diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9178,17 +9178,49 @@ bool BVNIsConstantSplat = BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()); + bool LE = Subtarget.isLittleEndian(); // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same // double. This is to exploit the XXSPLTIDP instruction. - if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() && - (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) && - convertToNonDenormSingle(APSplatBits)) { - SDValue SplatNode = DAG.getNode( - PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, - DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); - return DAG.getBitcast(Op.getValueType(), SplatNode); + // If we lose precision, we use XXSPLTI32DX. + if (BVNIsConstantSplat && (SplatBitSize == 64) && + Subtarget.hasPrefixInstrs()) { + if(convertToNonDenormSingle(APSplatBits) && + (Op->getValueType(0) == MVT::v2f64)) { + SDValue SplatNode = DAG.getNode( + PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, + DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); + return DAG.getBitcast(Op.getValueType(), SplatNode); + } else { // we may lose precision, so we have to use XXSPLTI32DX. + + uint32_t Hi = (uint32_t) ((APSplatBits.getZExtValue() & + 0xFFFFFFFF00000000LL) >> 32); + uint32_t Lo = (uint32_t) (APSplatBits.getZExtValue() & + 0xFFFFFFFF00000000LL); + SDValue SplatNode; + + if (!Hi || !Lo) + // If either load is 0, then we should generate XXLXOR to set to 0. + SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64); + + if (Lo) { + SplatNode = DAG.getNode( + PPCISD::XXSPLTI32DX, !Hi ? SDLoc(SplatNode) : dl, + MVT::v2i64, !Hi ? SplatNode : DAG.getUNDEF(MVT::v2i64), + DAG.getTargetConstant(LE ? 0 : 1, dl, MVT::i32), + DAG.getTargetConstant(Lo, dl, MVT::i32)); + } + if (Hi) { + SplatNode = DAG.getNode( + PPCISD::XXSPLTI32DX, SDLoc(SplatNode), + MVT::v2i64, SplatNode, + DAG.getTargetConstant(LE ? 1 : 0, SplatNode, MVT::i32), + DAG.getTargetConstant(Hi, SplatNode, MVT::i32)); + } + + return DAG.getBitcast(Op.getValueType(), SplatNode); + } } if (!BVNIsConstantSplat || SplatBitSize > 32) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2512,6 +2512,9 @@ def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; + + def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), + (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; } let Predicates = [IsISA3_1, HasVSX] in { diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll --- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll @@ -1,114 +1,216 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ -; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-NOPCREL +; RUN: --check-prefixes=CHECK-NOPCREL-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: FileCheck %s --check-prefixes=CHECK-NOPCREL-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: FileCheck %s --check-prefixes=CHECK-NOPREFIX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK-BE define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr { -; CHECK-LABEL: testDoubleToDoubleFail: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleToDoubleFail: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 1081435463 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleFail: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleFail: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, 1081435463 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleToDoubleFail: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-BE-NEXT: blr entry: ret <2 x double> } define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormToDouble: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormToDouble: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormToDouble: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 940259579 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDouble: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDouble: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, 940259579 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormToDouble: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-BE-NEXT: blr entry: ret <2 x double> } define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr { -; CHECK-LABEL: testDoubleToDoubleNaNFail: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -1 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-BE-NEXT: blr entry: ret <2 x double> } define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr { -; CHECK-LABEL: testDoubleNonRepresentableScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NOPCREL-NEXT: lfd f1, .LCPI3_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret double 3.423300e+02 } define dso_local float @testFloatDenormScalar() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NOPCREL-NEXT: lfs f1, .LCPI4_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret float 0x380B38FB80000000 } define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormToDoubleScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-NOPCREL-NEXT: lfs f1, .LCPI5_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret double 0x380B38FB80000000 } diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -118,3 +118,25 @@ %vecins1 = shufflevector <4 x i32> , <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins1 } + +define dso_local <2 x double> @test_xxsplti32dx_8() { +; CHECK-LABEL: test_xxsplti32dx_8 +; CHECK-LE: xxlxor vs34, vs34, vs34 +; CHECK-LE: xxsplti32dx vs34, 1, 1082660167 +; CHECK-BE: xxlxor vs34, vs34, vs34 +; CHECK-BE: xxsplti32dx vs34, 0, 1082660167 +; CHECK: blr +entry: + ret <2 x double> +} + +define dso_local <8 x i16> @test_xxsplti32dx_9() { +; CHECK-LABEL: test_xxsplti32dx_9 +; CHECK-LE: xxlxor vs34, vs34, vs34 +; CHECK-LE: xxsplti32dx vs34, 1, 23855277 +; CHECK-BE: xxlxor vs34, vs34, vs34 +; CHECK-BE: xxsplti32dx vs34, 0, 19070977 +; CHECK: blr +entry: + ret <8 x i16> +}