Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9186,21 +9186,48 @@ bool BVNIsConstantSplat = BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()); + bool LE = Subtarget.isLittleEndian(); // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same // double. This is to exploit the XXSPLTIDP instruction. - if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() && - (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) && - convertToNonDenormSingle(APSplatBits)) { - SDValue SplatNode = DAG.getNode( - PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, - DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); - return DAG.getBitcast(Op.getValueType(), SplatNode); + // If we lose precision, we use XXSPLTI32DX. + if (BVNIsConstantSplat && (SplatBitSize == 64) && Subtarget.hasPrefixInstrs()) { + if(convertToNonDenormSingle(APSplatBits) && + (Op->getValueType(0) == MVT::v2f64)) { + SDValue SplatNode = DAG.getNode( + PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, + DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); + return DAG.getBitcast(Op.getValueType(), SplatNode); + } else { // we may lose precision, so we have to use XXSPLTI32DX. + + uint32_t top = (uint32_t) ((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); + uint32_t bot = (uint32_t) (APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL); + SDValue SplatNode; + + if (!top || !bot) { + // if either load is 0, then we should generate XXLXOR to set to 0 + SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64); + } + + if (bot) { + SplatNode = DAG.getNode( + PPCISD::XXSPLTI32DX, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), + DAG.getTargetConstant(LE ? 0 : 1, dl, MVT::i32), + DAG.getTargetConstant(bot, dl, MVT::i32)); + } + if (top) { + SplatNode = DAG.getNode( + PPCISD::XXSPLTI32DX, bot ? SplatNode : DAG.getUNDEF(MVT::v2i64), + MVT::v2i64, SplatNode, DAG.getTargetConstant(LE ? 1 : 0, SplatNode, MVT::i32), + DAG.getTargetConstant(top, SplatNode, MVT::i32)); + } + + return DAG.getBitcast(Op.getValueType(), SplatNode); + } } if (!BVNIsConstantSplat || SplatBitSize > 32) { - bool IsPermutedLoad = false; const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0), IsPermutedLoad); Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2184,6 +2184,9 @@ def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; + + def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), + (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; } let Predicates = [IsISA3_1, HasVSX] in { Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll +++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll @@ -1,27 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ -; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-NOPCREL +; RUN: --check-prefixes=CHECK-NOPCREL-BE,CHECK-NOPCREL ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: FileCheck %s --check-prefixes=CHECK-NOPCREL-LE,CHECK-NOPCREL ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr { ; CHECK-LABEL: testDoubleToDoubleFail: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1 +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 1081435463 +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 ; CHECK-NEXT: blr ; ; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail: ; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPCREL-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, 1081435463 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 ; CHECK-NOPCREL-NEXT: blr entry: @@ -31,14 +33,16 @@ define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr { ; CHECK-LABEL: testFloatDenormToDouble: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1 +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 940259579 +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 940259579 ; CHECK-NEXT: blr ; ; CHECK-NOPCREL-LABEL: testFloatDenormToDouble: ; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPCREL-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, 940259579 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 940259579 ; CHECK-NOPCREL-NEXT: blr entry: @@ -48,14 +52,16 @@ define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr { ; CHECK-LABEL: testDoubleToDoubleNaNFail: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1 +; CHECK-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1 +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1 ; CHECK-NEXT: blr ; ; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail: ; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPCREL-NEXT: xxlxor vs34, vs34, vs34 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -1 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, -1 ; CHECK-NOPCREL-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/p10-splatImm32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -118,3 +118,25 @@ %vecins1 = shufflevector <4 x i32> , <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins1 } + +define dso_local <2 x double> @test_xxsplti32dx_8() { +; CHECK-LABEL: test_xxsplti32dx_8 +; CHECK-LE: xxlxor vs34, vs34, vs34 +; CHECK-LE: xxsplti32dx vs34, 1, 1082660167 +; CHECK-BE: xxlxor vs34, vs34, vs34 +; CHECK-BE: xxsplti32dx vs34, 0, 1082660167 +; CHECK: blr +entry: + ret <2 x double> +} + +define dso_local <8 x i16> @test_xxsplti32dx_9() { +; CHECK-LABEL: test_xxsplti32dx_9 +; CHECK-LE: xxlxor vs34, vs34, vs34 +; CHECK-LE: xxsplti32dx vs34, 1, 23855277 +; CHECK-BE: xxlxor vs34, vs34, vs34 +; CHECK-BE: xxsplti32dx vs34, 0, 19070977 +; CHECK: blr +entry: + ret <8 x i16> +}