diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3206,12 +3206,14 @@ SelectionDAG &DAG; bool LegalTys; bool LegalOps; + bool BitsRotatedForPPCF128; SDValue Old; SDValue New; explicit TargetLoweringOpt(SelectionDAG &InDAG, bool LT, bool LO) : - DAG(InDAG), LegalTys(LT), LegalOps(LO) {} + DAG(InDAG), LegalTys(LT), LegalOps(LO), + BitsRotatedForPPCF128(false) {} bool LegalTypes() const { return LegalTys; } bool LegalOperations() const { return LegalOps; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -846,6 +846,16 @@ // Other users may use these bits. EVT VT = Op.getValueType(); + + // On little endian, the order of ppcf128 is backwards. However, if we + // recursively traverse multiple nodes that produce ppcf128 results, we + // don't want to keep rotating the bits. + if (VT == MVT::ppcf128 && TLO.DAG.getDataLayout().isLittleEndian() && + !TLO.BitsRotatedForPPCF128) { + TLO.BitsRotatedForPPCF128 = true; + DemandedBits = DemandedBits.rotl(64); + } + if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the Known bits to diff --git a/llvm/test/CodeGen/PowerPC/pr45475.ll b/llvm/test/CodeGen/PowerPC/pr45475.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr45475.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE +define zeroext i1 @ppc128hi(ppc_fp128 %fp) { +; CHECK-LABEL: ppc128hi: +; CHECK: # %bb.0: +; CHECK-NEXT: mffprd r3, f2 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: ppc128hi: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: mffprd r3, f1 +; CHECK-BE-NEXT: rldicl r3, r3, 1, 63 +; CHECK-BE-NEXT: blr + %v128 = bitcast ppc_fp128 %fp to i128 + %shift = lshr i128 %v128, 64 + %high = trunc i128 %shift to i64 + %c = icmp slt i64 %high, 0 + ret i1 %c +} +define zeroext i1 @ppc128lo(ppc_fp128 %fp) { +; CHECK-LABEL: ppc128lo: +; CHECK: # %bb.0: +; CHECK-NEXT: mffprd r3, f1 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: ppc128lo: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: mffprd r3, f2 +; CHECK-BE-NEXT: rldicl r3, r3, 1, 63 +; CHECK-BE-NEXT: blr + %v128 = bitcast ppc_fp128 %fp to i128 + %low = trunc i128 %v128 to i64 + %c = icmp slt i64 %low, 0 + ret i1 %c +} +define dso_local signext i32 @testMultNodes(ppc_fp128 %a, ppc_fp128 %b, i32 signext %c, i32 signext %d) { +; CHECK-LABEL: testMultNodes: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpw r7, r8 +; CHECK-NEXT: bgt cr0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: fmr f4, f2 +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: mffprd r3, f4 +; CHECK-NEXT: rldicl r3, r3, 1, 63 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: testMultNodes: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: cmpw r7, r8 +; CHECK-BE-NEXT: bgt cr0, .LBB2_2 +; CHECK-BE-NEXT: # %bb.1: # %entry +; CHECK-BE-NEXT: fmr f3, f1 +; CHECK-BE-NEXT: .LBB2_2: # %entry +; CHECK-BE-NEXT: mffprd r3, f3 +; CHECK-BE-NEXT: rldicl r3, r3, 1, 63 +; CHECK-BE-NEXT: blr +entry: + %cmp = icmp sgt i32 %c, %d + %cond = select i1 %cmp, ppc_fp128 %b, ppc_fp128 %a + %0 = bitcast ppc_fp128 %cond to i128 + %shr = lshr i128 %0, 127 + %conv = trunc i128 %shr to i32 + ret i32 %conv +}