Index: llvm/lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -414,6 +414,14 @@ addPass(createLICMPass()); } + // LSR Pass always generates ICmpZero LSRUse for a loop trip count compare. + // PPCCTRLoops pass may transform this loop trip count compare to a hardware + // loop. + // So putting PPCCTRLoops pass to front of LSR can make LSR more precisely + // about generating ICmpZero. + if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) + addPass(createPPCCTRLoops()); + TargetPassConfig::addIRPasses(); } @@ -421,9 +429,6 @@ if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None) addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); - if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - addPass(createPPCCTRLoops()); - return false; } Index: llvm/test/CodeGen/PowerPC/addi-licm.ll =================================================================== --- llvm/test/CodeGen/PowerPC/addi-licm.ll +++ llvm/test/CodeGen/PowerPC/addi-licm.ll @@ -18,8 +18,8 @@ ; CHECK: addi [[REG1:[0-9]+]], 1, ; CHECK: addi [[REG2:[0-9]+]], 1, ; CHECK: %for.body.i -; CHECK-DAG: lfsx {{[0-9]+}}, [[REG1]], -; CHECK-DAG: lfsx {{[0-9]+}}, [[REG2]], +; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG1]]) +; CHECK-DAG: lfs {{[0-9]+}}, 0([[REG2]]) ; CHECK: blr ; PIP-LABEL: @foo Index: llvm/test/CodeGen/PowerPC/ctrloop-le.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ctrloop-le.ll +++ llvm/test/CodeGen/PowerPC/ctrloop-le.ll @@ -2,9 +2,6 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -; XFAIL: * -; SE needs improvement - ; CHECK: test_pos1_ir_sle ; CHECK: bdnz ; a < b Index: llvm/test/CodeGen/PowerPC/ctrloop-lt.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ctrloop-lt.ll +++ llvm/test/CodeGen/PowerPC/ctrloop-lt.ll @@ -2,9 +2,6 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -; XFAIL: * -; SE needs improvement - ; CHECK: test_pos1_ir_slt ; CHECK: bdnz ; a < b @@ -35,7 +32,6 @@ ; CHECK: test_pos2_ir_slt -; FIXME: Support this loop! ; CHECK: bdnz ; a < b define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { @@ -65,7 +61,6 @@ ; CHECK: test_pos4_ir_slt -; FIXME: Support this loop! ; CHECK: bdnz ; a < b define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind { Index: llvm/test/CodeGen/PowerPC/ctrloop-ne.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ctrloop-ne.ll +++ llvm/test/CodeGen/PowerPC/ctrloop-ne.ll @@ -32,8 +32,7 @@ ; CHECK: test_pos2_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -62,8 +61,7 @@ ; CHECK: test_pos4_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -92,8 +90,7 @@ ; CHECK: test_pos8_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -122,8 +119,7 @@ ; CHECK: test_pos16_ir_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -181,8 +177,7 @@ ; CHECK: test_pos2_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -211,8 +206,7 @@ ; CHECK: test_pos4_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -241,8 +235,7 @@ ; CHECK: test_pos8_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -271,8 +264,7 @@ ; CHECK: test_pos16_ri_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -330,8 +322,7 @@ ; CHECK: test_pos2_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -360,8 +351,7 @@ ; CHECK: test_pos4_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -390,8 +380,7 @@ ; CHECK: test_pos8_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: @@ -420,8 +409,7 @@ ; CHECK: test_pos16_rr_ne -; FIXME: Support this loop! -; CHECK-NOT: bdnz +; CHECK: bdnz ; a < b define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind { entry: Index: llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll =================================================================== --- llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll +++ llvm/test/CodeGen/PowerPC/ctrloop-shortLoops.ll @@ -86,9 +86,12 @@ } ; Function Attrs: norecurse nounwind +; On core a2q, IssueWidth is 1. On core pwr8, IssueWidth is 8. +; a2q should use mtctr, but pwr8 should not use mtctr. define signext i32 @testTripCount2NonSmallLoop() { ; CHECK-LABEL: testTripCount2NonSmallLoop: -; CHECK: bge +; CHECK-A2Q: mtctr +; CHECK-PWR8-NOT: mtctr ; CHECK: blr entry: Index: llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll =================================================================== --- llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll +++ llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll @@ -13,33 +13,32 @@ define void @foo(float* nocapture %data, float %d) { ; CHECK-LABEL: foo: -; CHECK: .LBB0_1: # %vector.body -; CHECK: add 5, 3, 4 -; CHECK-NEXT: stxvx 0, 3, 4 +; CHECK: .LBB0_1: # %vector.body +; CHECK: stxv 0, -192(4) +; CHECK-NEXT: stxv 0, -176(4) +; CHECK-NEXT: stxv 0, -160(4) +; CHECK-NEXT: stxv 0, -144(4) +; CHECK-NEXT: stxv 0, -128(4) +; CHECK-NEXT: stxv 0, -112(4) +; CHECK-NEXT: stxv 0, -96(4) +; CHECK-NEXT: stxv 0, -80(4) +; CHECK-NEXT: stxv 0, -64(4) +; CHECK-NEXT: stxv 0, -48(4) +; CHECK-NEXT: stxv 0, -32(4) +; CHECK-NEXT: stxv 0, -16(4) +; CHECK-NEXT: stxv 0, 0(4) +; CHECK-NEXT: stxv 0, 16(4) +; CHECK-NEXT: stxv 0, 32(4) +; CHECK-NEXT: stxv 0, 48(4) +; CHECK-NEXT: stxv 0, 64(4) +; CHECK-NEXT: stxv 0, 80(4) +; CHECK-NEXT: stxv 0, 96(4) +; CHECK-NEXT: stxv 0, 112(4) +; CHECK-NEXT: stxv 0, 128(4) +; CHECK-NEXT: stxv 0, 144(4) +; CHECK-NEXT: stxv 0, 160(4) +; CHECK-NEXT: stxv 0, 176(4) ; CHECK-NEXT: addi 4, 4, 384 -; CHECK-NEXT: stxv 0, 16(5) -; CHECK-NEXT: stxv 0, 32(5) -; CHECK-NEXT: stxv 0, 48(5) -; CHECK-NEXT: stxv 0, 64(5) -; CHECK-NEXT: stxv 0, 80(5) -; CHECK-NEXT: stxv 0, 96(5) -; CHECK-NEXT: stxv 0, 112(5) -; CHECK-NEXT: stxv 0, 128(5) -; CHECK-NEXT: stxv 0, 144(5) -; CHECK-NEXT: stxv 0, 160(5) -; CHECK-NEXT: stxv 0, 176(5) -; CHECK-NEXT: stxv 0, 192(5) -; CHECK-NEXT: stxv 0, 208(5) -; CHECK-NEXT: stxv 0, 224(5) -; CHECK-NEXT: stxv 0, 240(5) -; CHECK-NEXT: stxv 0, 256(5) -; CHECK-NEXT: stxv 0, 272(5) -; CHECK-NEXT: stxv 0, 288(5) -; CHECK-NEXT: stxv 0, 304(5) -; CHECK-NEXT: stxv 0, 320(5) -; CHECK-NEXT: stxv 0, 336(5) -; CHECK-NEXT: stxv 0, 352(5) -; CHECK-NEXT: stxv 0, 368(5) ; CHECK-NEXT: bdnz .LBB0_1 entry: Index: llvm/test/CodeGen/PowerPC/stwu-sched.ll =================================================================== --- llvm/test/CodeGen/PowerPC/stwu-sched.ll +++ llvm/test/CodeGen/PowerPC/stwu-sched.ll @@ -1,9 +1,9 @@ ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s \ -; RUN: --check-prefix=CHECK-ITIN -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s -verify-machineinstrs | FileCheck %s \ -; RUN: --check-prefix=CHECK-ITIN +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -disable-ppc-ctrloops < %s -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-ITIN +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -disable-ppc-ctrloops < %s -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=CHECK-ITIN %0 = type { i32, i32 } @@ -12,13 +12,11 @@ define void @initCombList(%0* nocapture, i32 signext) local_unnamed_addr #0 { ; CHECK-LABEL: initCombList: ; CHECK: addi 4, 4, -8 -; CHECK: stwu 5, 64(3) - +; CHECK: stwu 4, 64(3) ; CHECK-ITIN-LABEL: initCombList: ; CHECK-ITIN: stwu 5, 64(4) ; CHECK-ITIN-NEXT: addi 3, 3, -8 - %3 = zext i32 %1 to i64 br i1 undef, label %6, label %4 Index: llvm/test/CodeGen/PowerPC/unal-altivec.ll =================================================================== --- llvm/test/CodeGen/PowerPC/unal-altivec.ll +++ llvm/test/CodeGen/PowerPC/unal-altivec.ll @@ -29,15 +29,14 @@ br i1 %10, label %for.end, label %vector.body ; CHECK: @foo -; CHECK-DAG: li [[C0:[0-9]+]], 0 +; CHECK-DAG: li [[C16:[0-9]+]], 16 ; CHECK-DAG: lvx [[CNST:[0-9]+]], ; CHECK: .LBB0_1: -; CHECK-DAG: lvsl [[MASK1:[0-9]+]], [[B1:[0-9]+]], [[C0]] -; CHECK-DAG: add [[B3:[0-9]+]], [[B1]], [[C0]] -; CHECK-DAG: lvx [[LD1:[0-9]+]], [[B1]], [[C0]] -; CHECK-DAG: lvx [[LD2:[0-9]+]], [[B3]], -; CHECK-DAG: vperm [[R1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] -; CHECK-DAG: vaddfp {{[0-9]+}}, [[R1]], [[CNST]] +; CHECK-DAG: lvx [[LD1:[0-9]+]], 0, [[C0:[0-9]+]] +; CHECK-DAG: lvx [[LD2:[0-9]+]], [[C0]], [[C16]] +; CHECK-DAG: lvsl [[MASK1:[0-9]+]], 0, [[C0]] +; CHECK-DAG: vperm [[VR1:[0-9]+]], [[LD1]], [[LD2]], [[MASK1]] +; CHECK-DAG: vaddfp {{[0-9]+}}, [[VR1]], [[CNST]] ; CHECK: blr for.end: ; preds = %vector.body Index: llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll =================================================================== --- llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll +++ llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll @@ -75,9 +75,9 @@ define i64 @test_xaddrX4_loop(i8* %p) { ; CHECK-LABEL: test_xaddrX4_loop: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li r5, 8 +; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: addi r4, r3, -8 -; CHECK-NEXT: li r3, 8 -; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: li r5, 3 ; loop instruction number is changed from 5 to 4, so its align is changed from 5 to 4.