diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -73,7 +73,7 @@ // let Interpretation64Bit = 1, isCodeGenOnly = 1 in { -let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in { let isReturn = 1, isPredicable = 1, Uses = [LR8, RM] in def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, [(retflag)]>, Requires<[In64BitMode]>; @@ -100,7 +100,7 @@ def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>, PPC970_Unit_BRU; -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, hasSideEffects = 0 in { let Defs = [CTR8], Uses = [CTR8] in { def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), "bdz $dst">; @@ -118,7 +118,7 @@ -let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), @@ -193,7 +193,7 @@ // FIXME: Duplicating this for the asm parser should be unnecessary, but the // previous definition must be marked as CodeGen only to prevent decoding // conflicts. -let Interpretation64Bit = 1, isAsmParserOnly = 1 in +let Interpretation64Bit = 1, isAsmParserOnly = 1, hasSideEffects = 0 in let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func), "bl $func", IIC_BrB, []>; @@ -408,6 +408,7 @@ "#TC_RETURNr8 $dst $offset", []>; +let hasSideEffects = 0 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB, @@ -425,6 +426,7 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst), "ba $dst", IIC_BrB, []>; +} } // Interpretation64Bit def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1649,7 +1649,7 @@ "#RESTORE_CRBIT", []>; } -let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in { let isPredicable = 1, isReturn = 1, Uses = [LR, RM] in def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB, [(retflag)]>, Requires<[In32BitMode]>; @@ -1690,7 +1690,8 @@ def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>, PPC970_Unit_BRU; -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, + hasSideEffects = 0 in { let isBarrier = 1 in { let isPredicable = 1 in def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), @@ -1782,7 +1783,8 @@ } // The unconditional BCL used by the SjLj setjmp code. -let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in { +let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7, + hasSideEffects = 0 in { let Defs = [LR], Uses = [RM] in { def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), "bcl 20, 31, $dst">; @@ -1917,7 +1919,7 @@ } -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in @@ -5059,7 +5061,7 @@ // These generic branch instruction forms are used for the assembler parser only. // Defs and Uses are conservative, since we don't know the BO value. -let PPC970_Unit = 7, isBranch = 1 in { +let PPC970_Unit = 7, isBranch = 1, hasSideEffects = 0 in { let Defs = [CTR], Uses = [CTR, RM] in { def gBC : BForm_3<16, 0, 0, (outs), (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst), diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir --- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir +++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir @@ -398,7 +398,6 @@ ; CHECK: [[ADD4_2:%[0-9]+]]:gprc = nsw ADD4 [[LWZU]], [[PHI5]] ; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]] ; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store (s32) into %ir.44, !tbaa !2) - ; CHECK: [[LWZ:%[0-9]+]]:gprc = LWZ 4, [[LWZU1]] :: (load (s32) from %ir.uglygep1112.cast, !tbaa !2) ; CHECK: BCC 76, [[CMPLWI2]], %bb.15 ; CHECK: B %bb.13 ; CHECK: bb.13 (%ir-block.60): @@ -424,6 +423,7 @@ ; CHECK: successors: %bb.9(0x7c000000), %bb.3(0x04000000) ; CHECK: [[PHI9:%[0-9]+]]:gprc = PHI [[ADDI3]], %bb.14, [[RLWINM4]], %bb.15, [[COPY14]], %bb.16 ; CHECK: [[COPY15:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]] + ; CHECK: [[LWZ:%[0-9]+]]:gprc = LWZ 4, [[LWZU1]] :: (load (s32) from %ir.uglygep1112.cast, !tbaa !2) ; CHECK: [[ADD4_4:%[0-9]+]]:gprc = nsw ADD4 [[LWZ]], [[ADD4_2]] ; CHECK: [[ADD4_5:%[0-9]+]]:gprc = nsw ADD4 [[PHI9]], [[ADD4_4]] ; CHECK: STW killed [[ADD4_5]], 4, [[COPY15]] :: (store (s32) into %ir.uglygep78.cast, !tbaa !2) diff --git a/llvm/test/CodeGen/PowerPC/sink-side-effect.ll b/llvm/test/CodeGen/PowerPC/sink-side-effect.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sink-side-effect.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le -mcpu=pwr9 -verify-machineinstrs < %s | FileCheck %s + +define double @zot(i32* %arg, float* %arg1, i16* %arg2) { +; CHECK-LABEL: zot: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: bc 12, 20, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb3 +; CHECK-NEXT: lhz 5, 0(5) +; CHECK-NEXT: rlwinm. 5, 5, 28, 30, 31 +; CHECK-NEXT: .LBB0_2: # %bb10 +; CHECK-NEXT: lfs 0, 0(4) +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: li 4, 2 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_3: # %bb17 +; CHECK-NEXT: # +; CHECK-NEXT: addi 4, 4, 1 +; CHECK-NEXT: .LBB0_4: # %bb17 +; CHECK-NEXT: # +; CHECK-NEXT: cmpw 4, 3 +; CHECK-NEXT: bge 0, .LBB0_3 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: xsmuldp 1, 1, 0 +; CHECK-NEXT: b .LBB0_3 +bb: + %tmp = load i32, i32* %arg, align 8 + br i1 undef, label %bb9, label %bb3 + +bb3: + %tmp4 = load i16, i16* %arg2, align 4 + %tmp5 = lshr i16 %tmp4, 4 + %tmp6 = and i16 %tmp5, 3 + %tmp7 = zext i16 %tmp6 to i32 + %tmp8 = icmp eq i16 %tmp6, 0 + br i1 %tmp8, label %bb9, label %bb10 + +bb9: + br label %bb10 + +bb10: + %tmp11 = phi i32 [ undef, %bb9 ], [ %tmp7, %bb3 ] + %tmp12 = icmp sgt i32 %tmp11, 1 + br label %bb13 + +bb13: + %tmp14 = load float, float* %arg1, align 4 + %tmp15 = fpext float %tmp14 to double + br label %bb16 + +bb16: + br label %bb17 + +bb17: + %tmp18 = phi i32 [ %tmp23, %bb17 ], [ 2, %bb16 ] + %tmp19 = phi double [ %tmp22, %bb17 ], [ %tmp15, %bb16 ] + %tmp20 = icmp slt i32 %tmp18, %tmp + %tmp21 = fmul fast double %tmp19, %tmp15 + %tmp22 = select i1 %tmp20, double %tmp21, double %tmp19 + %tmp23 = add nuw i32 %tmp18, 1 + br label %bb17 +} + +declare double @ham()