Index: llvm/include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -842,7 +842,11 @@ /// Allow the target to override the cost of using a callee-saved register for /// the first time. Default value of 0 means we will use a callee-saved /// register if it is available. - virtual unsigned getCSRFirstUseCost() const { return 0; } + virtual unsigned getCSRFirstUseCost() const { + // If use CSR, the cost is load/store pair in prologue/epilogue. + // So the cost is 2 times spill like the cost calculated in SpillPlacer. + return 2; + } /// Returns true if the target requires (and can make use of) the register /// scavenger. Index: llvm/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -2842,28 +2842,8 @@ } void RAGreedy::initializeCSRCost() { - // We use the larger one out of the command-line option and the value report - // by TRI. - CSRCost = BlockFrequency( - std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); - if (!CSRCost.getFrequency()) - return; - - // Raw cost is relative to Entry == 2^14; scale it appropriately. - uint64_t ActualEntry = MBFI->getEntryFreq(); - if (!ActualEntry) { - CSRCost = 0; - return; - } - uint64_t FixedEntry = 1 << 14; - if (ActualEntry < FixedEntry) - CSRCost *= BranchProbability(ActualEntry, FixedEntry); - else if (ActualEntry <= UINT32_MAX) - // Invert the fraction and divide. - CSRCost /= BranchProbability(FixedEntry, ActualEntry); - else - // Can't use BranchProbability in general, since it takes 32-bit numbers. - CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); + // We need scale the cost relative to entry freq. + CSRCost = BlockFrequency(MBFI->getEntryFreq() * TRI->getCSRFirstUseCost()); } /// Collect the hint info for \p Reg. Index: llvm/test/CodeGen/PowerPC/csr-split.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/csr-split.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-PWR9 +; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; Check CSR split can work properly for tests below. + +@a = common dso_local local_unnamed_addr global i32 0, align 4 + +; TODO: +; After enhance the ShrinkWrap about preventing premature stack +; popping when occurs a indirect stack access. The prologue should +; be moved from entry to # %bb.1 +define dso_local signext i32 @test1(i32* %b) local_unnamed_addr { +; CHECK-PWR9-LABEL: test1: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: mflr r0 +; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 48 +; CHECK-PWR9-NEXT: .cfi_offset lr, 16 +; CHECK-PWR9-NEXT: .cfi_offset r30, -16 +; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: std r0, 16(r1) +; CHECK-PWR9-NEXT: stdu r1, -48(r1) +; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha +; CHECK-PWR9-NEXT: lwa r4, a@toc@l(r4) +; CHECK-PWR9-NEXT: cmpld r4, r3 +; CHECK-PWR9-NEXT: # implicit-def: $r4 +; CHECK-PWR9-NEXT: bne cr0, .LBB0_2 +; CHECK-PWR9-NEXT: # %bb.1: # %if.then +; CHECK-PWR9-NEXT: mr r30, r3 +; CHECK-PWR9-NEXT: bl callVoid +; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: mr r3, r30 +; CHECK-PWR9-NEXT: bl callNonVoid +; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: mr r4, r3 +; CHECK-PWR9-NEXT: .LBB0_2: # %if.end +; CHECK-PWR9-NEXT: extsw r3, r4 +; CHECK-PWR9-NEXT: addi r1, r1, 48 +; CHECK-PWR9-NEXT: ld r0, 16(r1) +; CHECK-PWR9-NEXT: mtlr r0 +; CHECK-PWR9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: blr +; +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: addis r4, r2, a@toc@ha +; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; CHECK-NEXT: lwa r5, a@toc@l(r4) +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: cmpld r5, r3 +; CHECK-NEXT: # implicit-def: $r3 +; CHECK-NEXT: bne cr0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: mr r30, r4 +; CHECK-NEXT: bl callVoid +; CHECK-NEXT: nop +; CHECK-NEXT: mr r3, r30 +; CHECK-NEXT: bl callNonVoid +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 128 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @a, align 4, !tbaa !2 + %conv = sext i32 %0 to i64 + %1 = inttoptr i64 %conv to i32* + %cmp = icmp eq i32* %1, %b + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %call = tail call signext i32 bitcast (i32 (...)* @callVoid to i32 ()*)() + %call2 = tail call signext i32 @callNonVoid(i32* %b) + br label %if.end + +if.end: ; preds = %if.then, %entry + %retval.0 = phi i32 [ %call2, %if.then ], [ undef, %entry ] + ret i32 %retval.0 +} + +declare signext i32 @callVoid(...) local_unnamed_addr + +declare signext i32 @callNonVoid(i32*) local_unnamed_addr + +define dso_local signext i32 @test2(i32* %p1) local_unnamed_addr { +; CHECK-PWR9-LABEL: test2: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: mflr r0 +; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 48 +; CHECK-PWR9-NEXT: .cfi_offset lr, 16 +; CHECK-PWR9-NEXT: .cfi_offset r30, -16 +; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: std r0, 16(r1) +; CHECK-PWR9-NEXT: stdu r1, -48(r1) +; CHECK-PWR9-NEXT: mr r4, r3 +; CHECK-PWR9-NEXT: li r3, 0 +; CHECK-PWR9-NEXT: cmpldi r4, 0 +; CHECK-PWR9-NEXT: beq cr0, .LBB1_3 +; CHECK-PWR9-NEXT: # %bb.1: # %if.end +; CHECK-PWR9-NEXT: addis r5, r2, a@toc@ha +; CHECK-PWR9-NEXT: lwa r5, a@toc@l(r5) +; CHECK-PWR9-NEXT: cmpld r5, r4 +; CHECK-PWR9-NEXT: bne cr0, .LBB1_3 +; CHECK-PWR9-NEXT: # %bb.2: # %if.then2 +; CHECK-PWR9-NEXT: mr r30, r4 +; CHECK-PWR9-NEXT: bl callVoid +; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: mr r3, r30 +; CHECK-PWR9-NEXT: bl callNonVoid +; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: .LBB1_3: # %return +; CHECK-PWR9-NEXT: extsw r3, r3 +; CHECK-PWR9-NEXT: addi r1, r1, 48 +; CHECK-PWR9-NEXT: ld r0, 16(r1) +; CHECK-PWR9-NEXT: mtlr r0 +; CHECK-PWR9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: blr +; +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -128(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: cmpldi r3, 0 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; CHECK-NEXT: beq cr0, .LBB1_3 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: addis r5, r2, a@toc@ha +; CHECK-NEXT: lwa r5, a@toc@l(r5) +; CHECK-NEXT: cmpld r5, r4 +; CHECK-NEXT: bne cr0, .LBB1_3 +; CHECK-NEXT: # %bb.2: # %if.then2 +; CHECK-NEXT: mr r30, r4 +; CHECK-NEXT: bl callVoid +; CHECK-NEXT: nop +; CHECK-NEXT: mr r3, r30 +; CHECK-NEXT: bl callNonVoid +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB1_3: # %return +; CHECK-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 128 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %tobool = icmp eq i32* %p1, null + br i1 %tobool, label %return, label %if.end + +if.end: ; preds = %entry + %0 = load i32, i32* @a, align 4, !tbaa !2 + %conv = sext i32 %0 to i64 + %1 = inttoptr i64 %conv to i32* + %cmp = icmp eq i32* %1, %p1 + br i1 %cmp, label %if.then2, label %return + +if.then2: ; preds = %if.end + %call = tail call signext i32 bitcast (i32 (...)* @callVoid to i32 ()*)() + %call3 = tail call signext i32 @callNonVoid(i32* nonnull %p1) + br label %return + +return: ; preds = %if.end, %entry, %if.then2 + %retval.0 = phi i32 [ %call3, %if.then2 ], [ 0, %entry ], [ 0, %if.end ] + ret i32 %retval.0 +} + + +define dso_local i8* @test3(i8** nocapture %p1, i8 zeroext %p2) local_unnamed_addr { +; CHECK-PWR9-LABEL: test3: +; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: mflr r0 +; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 64 +; CHECK-PWR9-NEXT: .cfi_offset lr, 16 +; CHECK-PWR9-NEXT: .cfi_offset r30, -16 +; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: std r0, 16(r1) +; CHECK-PWR9-NEXT: stdu r1, -64(r1) +; CHECK-PWR9-NEXT: mr r5, r3 +; CHECK-PWR9-NEXT: ld r3, 0(r3) +; CHECK-PWR9-NEXT: cmpldi r3, 0 +; CHECK-PWR9-NEXT: beq cr0, .LBB2_2 +; CHECK-PWR9-NEXT: # %bb.1: # %land.rhs +; CHECK-PWR9-NEXT: std r3, 40(r1) # 8-byte Folded Spill +; CHECK-PWR9-NEXT: ld r3, 40(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: clrldi r4, r4, 32 +; CHECK-PWR9-NEXT: mr r30, r5 +; CHECK-PWR9-NEXT: bl bar +; CHECK-PWR9-NEXT: nop +; CHECK-PWR9-NEXT: mr r4, r3 +; CHECK-PWR9-NEXT: ld r3, 40(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: std r4, 0(r30) +; CHECK-PWR9-NEXT: .LBB2_2: # %land.end +; CHECK-PWR9-NEXT: addi r1, r1, 64 +; CHECK-PWR9-NEXT: ld r0, 16(r1) +; CHECK-PWR9-NEXT: mtlr r0 +; CHECK-PWR9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-PWR9-NEXT: blr +; +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -144(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: mr r5, r3 +; CHECK-NEXT: ld r3, 0(r3) +; CHECK-NEXT: std r30, 128(r1) # 8-byte Folded Spill +; CHECK-NEXT: cmpldi r3, 0 +; CHECK-NEXT: beq cr0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %land.rhs +; CHECK-NEXT: std r3, 120(r1) # 8-byte Folded Spill +; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: mr r30, r5 +; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: bl bar +; CHECK-NEXT: nop +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: ld r3, 120(r1) # 8-byte Folded Reload +; CHECK-NEXT: std r4, 0(r30) +; CHECK-NEXT: .LBB2_2: # %land.end +; CHECK-NEXT: ld r30, 128(r1) # 8-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 144 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i8*, i8** %p1, align 8, !tbaa !6 + %tobool = icmp eq i8* %0, null + br i1 %tobool, label %land.end, label %land.rhs + +land.rhs: ; preds = %entry + %call = tail call i8* @bar(i8* nonnull %0, i8 zeroext %p2) + store i8* %call, i8** %p1, align 8, !tbaa !6 + br label %land.end + +land.end: ; preds = %entry, %land.rhs + ret i8* %0 +} + +declare i8* @bar(i8*, i8 zeroext) local_unnamed_addr + + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (trunk 367381) (llvm/trunk 367388)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !4, i64 0}