diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15202,13 +15202,17 @@ } } break; - case ISD::BSWAP: + case ISD::BSWAP: { // Turn BSWAP (LOAD) -> lhbrx/lwbrx. - if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && - N->getOperand(0).hasOneUse() && + // For subtargets without LDBRX, we can still do better than the default + // expansion even for 64-bit BSWAP (LOAD). + bool is64BitBswapOn64BitTgt = + Subtarget.isPPC64() && N->getValueType(0) == MVT::i64; + bool isSingleUseNonExtLd = ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && + N->getOperand(0).hasOneUse(); + if (isSingleUseNonExtLd && (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || - (Subtarget.hasLDBRX() && Subtarget.isPPC64() && - N->getValueType(0) == MVT::i64))) { + (Subtarget.hasLDBRX() && is64BitBswapOn64BitTgt))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. @@ -15238,8 +15242,30 @@ // Return N so it doesn't get rechecked! return SDValue(N, 0); + } else if (is64BitBswapOn64BitTgt && isSingleUseNonExtLd) { + // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only + // before legalization so that the BUILD_PAIR is handled correctly. + if (!DCI.isBeforeLegalize()) + return SDValue(); + LoadSDNode *LD = cast(N->getOperand(0)); + SDValue BasePtr = LD->getBasePtr(); + SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, + LD->getPointerInfo(), LD->getAlignment()); + Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo); + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getIntPtrConstant(4, dl)); + SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, + LD->getPointerInfo(), LD->getAlignment()); + Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi); + SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Hi.getOperand(0).getValue(1), + Lo.getOperand(0).getValue(1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF); + return Res; } break; + } case PPCISD::VCMP: // If a VCMP_rec node already exists with exactly the same operands as this // node, use its result instead of this node (VCMP_rec computes both a CR6 diff --git a/llvm/test/CodeGen/PowerPC/ld-bswap64-no-ldbrx.ll b/llvm/test/CodeGen/PowerPC/ld-bswap64-no-ldbrx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ld-bswap64-no-ldbrx.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64-- -mcpu=pwr5 -verify-machineinstrs | FileCheck %s +define void @bs(i64* %p) { +; CHECK-LABEL: bs: +; CHECK: # %bb.0: +; CHECK-NEXT: li 4, 4 +; CHECK-NEXT: lwbrx 5, 0, 3 +; CHECK-NEXT: lwbrx 4, 3, 4 +; CHECK-NEXT: rldimi 4, 5, 32, 0 +; CHECK-NEXT: std 4, 0(3) +; CHECK-NEXT: blr + %x = load i64, i64* %p, align 8 + %b = call i64 @llvm.bswap.i64(i64 %x) + store i64 %b, i64* %p, align 8 + ret void +} +declare i64 @llvm.bswap.i64(i64) #2