diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11497,7 +11497,7 @@ return false; if (LD->getChain() != Base->getChain()) return false; - EVT VT = LD->getValueType(0); + EVT VT = LD->getMemoryVT(); if (VT.getSizeInBits() / 8 != Bytes) return false; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14245,17 +14245,23 @@ unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize(); SDValue FirstInput = N->getOperand(0); bool IsRoundOfExtLoad = false; + LoadSDNode *FirstLoad = nullptr; if (FirstInput.getOpcode() == ISD::FP_ROUND && FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { - LoadSDNode *LD = dyn_cast(FirstInput.getOperand(0)); - IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; + FirstLoad = cast(FirstInput.getOperand(0)); + IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD; } // Not a build vector of (possibly fp_rounded) loads. if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) || N->getNumOperands() == 1) return SDValue(); + if (!IsRoundOfExtLoad) + FirstLoad = cast(FirstInput); + + SmallVector InputLoads; + InputLoads.push_back(FirstLoad); for (int i = 1, e = N->getNumOperands(); i < e; ++i) { // If any inputs are fp_round(extload), they all must be. if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) @@ -14268,53 +14274,55 @@ SDValue PreviousInput = IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); - LoadSDNode *LD1 = dyn_cast(PreviousInput); - LoadSDNode *LD2 = dyn_cast(NextInput); + LoadSDNode *LD1 = cast(PreviousInput); + LoadSDNode *LD2 = cast(NextInput); // If any inputs are fp_round(extload), they all must be. if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) return SDValue(); - if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) + // We only care about regular loads. The PPC-specific load intrinsics + // will not lead to a merge opportunity. + if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1)) InputsAreConsecutiveLoads = false; - if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) + if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1)) InputsAreReverseConsecutive = false; // Exit early if the loads are neither consecutive nor reverse consecutive. if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) return SDValue(); + InputLoads.push_back(LD2); } assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && "The loads cannot be both consecutive and reverse consecutive."); - SDValue FirstLoadOp = - IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; - SDValue LastLoadOp = - IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : - N->getOperand(N->getNumOperands()-1); - - LoadSDNode *LD1 = dyn_cast(FirstLoadOp); - LoadSDNode *LDL = dyn_cast(LastLoadOp); + SDValue WideLoad; + SDValue ReturnSDVal; if (InputsAreConsecutiveLoads) { - assert(LD1 && "Input needs to be a LoadSDNode."); - return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), - LD1->getBasePtr(), LD1->getPointerInfo(), - LD1->getAlign()); - } - if (InputsAreReverseConsecutive) { - assert(LDL && "Input needs to be a LoadSDNode."); - SDValue Load = - DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(), - LDL->getPointerInfo(), LDL->getAlign()); + assert(FirstLoad && "Input needs to be a LoadSDNode."); + WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(), + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), + FirstLoad->getAlign()); + ReturnSDVal = WideLoad; + } else if (InputsAreReverseConsecutive) { + LoadSDNode *LastLoad = InputLoads.back(); + assert(LastLoad && "Input needs to be a LoadSDNode."); + WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(), + LastLoad->getBasePtr(), LastLoad->getPointerInfo(), + LastLoad->getAlign()); SmallVector Ops; for (int i = N->getNumOperands() - 1; i >= 0; i--) Ops.push_back(i); - return DAG.getVectorShuffle(N->getValueType(0), dl, Load, - DAG.getUNDEF(N->getValueType(0)), Ops); - } - return SDValue(); + ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad, + DAG.getUNDEF(N->getValueType(0)), Ops); + } else + return SDValue(); + + for (auto *LD : InputLoads) + DAG.makeEquivalentMemoryOrdering(LD, WideLoad); + return ReturnSDVal; } // This function adds the required vector_shuffle needed to get diff --git a/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/build-vector-to-ld-chain.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s + +%0 = type <{ %1, ptr, i32, [4 x i8] }> +%1 = type { %2 } +%2 = type { %3 } +%3 = type { ptr, ptr, ptr } + +$testfunc = comdat any + +declare void @_ZdlPv() local_unnamed_addr #0 + +define void @testfunc(i64 %arg) local_unnamed_addr #0 comdat { +; CHECK-LABEL: testfunc: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -80(1) +; CHECK-NEXT: std 0, 96(1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset v30, -32 +; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: li 4, 48 +; CHECK-NEXT: addi 3, 3, 24 +; CHECK-NEXT: stvx 30, 1, 4 # 16-byte Folded Spill +; CHECK-NEXT: li 4, 64 +; CHECK-NEXT: stvx 31, 1, 4 # 16-byte Folded Spill +; CHECK-NEXT: lxvd2x 63, 0, 3 +; CHECK-NEXT: xxswapd 62, 63 +; CHECK-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb37 +; CHECK-NEXT: bl _ZdlPv +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_2: # %bb38 +; CHECK-NEXT: stxsiwx 62, 0, 3 +; CHECK-NEXT: stxsdx 63, 0, 3 +; CHECK-NEXT: li 3, 64 +; CHECK-NEXT: lvx 31, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: li 3, 48 +; CHECK-NEXT: lvx 30, 1, 3 # 16-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 80 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +bb: + %i = inttoptr i64 %arg to ptr + %i6 = getelementptr inbounds %0, ptr %i, i64 0, i32 1 + %i7 = load <12 x i8>, ptr %i6, align 8 + br i1 poison, label %bb38, label %bb37 + +bb37: ; preds = %bb + tail call void @_ZdlPv() #1 + br label %bb38 + +bb38: ; preds = %bb37, %bb + store <12 x i8> %i7, ptr poison, align 8 + ret void +}