diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -943,7 +943,7 @@ [(set i64:$rD, (sextloadi32 xaddrX4:$src))]>, isPPC64, PPC970_DGroup_Cracked; // For fast-isel: -let isCodeGenOnly = 1, mayLoad = 1 in { +let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in { def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src), "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64, PPC970_DGroup_Cracked; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2039,6 +2039,7 @@ } // Load Multiple +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src), "lmw $rD, $src", IIC_LdStLMW, []>; @@ -2193,6 +2194,7 @@ } // Store Multiple +let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst), "stmw $rS, $dst", IIC_LdStLMW, []>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1379,7 +1379,7 @@ [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; // VSX scalar loads introduced in ISA 2.07 - let mayLoad = 1, mayStore = 0 in { + let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { let CodeSize = 3 in def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src), "lxsspx $XT, $src", IIC_LdStLFD, []>; @@ -1404,7 +1404,7 @@ } // mayLoad // VSX scalar stores introduced in ISA 2.07 - let mayStore = 1, mayLoad = 0 in { + let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { let CodeSize = 3 in def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), "stxsspx $XT, $dst", IIC_LdStSTFD, []>; @@ -2977,7 +2977,7 @@ // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. - let mayLoad = 1, mayStore = 0 in { + let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), "lxv $XT, $src", IIC_LdStLFD, []>; @@ -3022,7 +3022,7 @@ // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. - let mayStore = 1, mayLoad = 0 in { + let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), "stxv $XT, $dst", IIC_LdStSTFD, []>; @@ -3769,7 +3769,7 @@ } } -let Predicates = [HasP9Vector] in { +let Predicates = [HasP9Vector], hasSideEffects = 0 in { let mayStore = 1 in { def SPILLTOVSR_STX : PseudoXFormMemOp<(outs), (ins spilltovsrrc:$XT, memrr:$dst), diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll --- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll +++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll @@ -794,8 +794,6 @@ ; ; CHECK-P9-LABEL: test_13_consecutive_stores_of_bytes: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4 -; CHECK-P9-NEXT: stxsibx vs35, 0, r5 ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 12 ; CHECK-P9-NEXT: li r3, 1 ; CHECK-P9-NEXT: stxsibx vs35, r5, r3 @@ -808,6 +806,8 @@ ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 2 ; CHECK-P9-NEXT: li r3, 4 ; CHECK-P9-NEXT: stxsibx vs35, r5, r3 +; CHECK-P9-NEXT: vsldoi v3, v2, v2, 4 +; CHECK-P9-NEXT: stxsibx vs35, 0, r5 ; CHECK-P9-NEXT: vsldoi v3, v2, v2, 8 ; CHECK-P9-NEXT: li r3, 5 ; CHECK-P9-NEXT: stxsibx vs35, r5, r3 @@ -836,19 +836,19 @@ ; ; CHECK-P9-BE-LABEL: test_13_consecutive_stores_of_bytes: ; CHECK-P9-BE: # %bb.0: # %entry -; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13 -; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5 +; CHECK-P9-BE-NEXT: li r3, 3 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 5 +; CHECK-P9-BE-NEXT: stxsibx vs34, r5, r3 ; CHECK-P9-BE-NEXT: li r3, 1 ; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 2 ; CHECK-P9-BE-NEXT: li r3, 2 ; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3 -; CHECK-P9-BE-NEXT: li r3, 3 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 15 -; CHECK-P9-BE-NEXT: stxsibx vs34, r5, r3 ; CHECK-P9-BE-NEXT: li r3, 4 ; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3 +; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 13 +; CHECK-P9-BE-NEXT: stxsibx vs35, 0, r5 ; CHECK-P9-BE-NEXT: vsldoi v3, v2, v2, 9 ; CHECK-P9-BE-NEXT: li r3, 5 ; CHECK-P9-BE-NEXT: stxsibx vs35, r5, r3 diff --git a/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll b/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll --- a/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll +++ b/llvm/test/CodeGen/PowerPC/scheduling-mem-dependency.ll @@ -1,19 +1,58 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK-P9 define i64 @store_disjoint_memory(i64* nocapture %P, i64 %v) { entry: ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: store_disjoint_memory:%bb.0 -; CHECK:SU(2): STD renamable $x4, 24, renamable $x5 :: (store 8 into %ir.arrayidx) +; CHECK:SU([[REG2:[0-9]+]]): STD renamable $x{{[0-9]+}}, 24, renamable $x[[REG5:[0-9]+]] ; CHECK-NOT: Successors: -; CHECK-NOT: SU(3): Ord Latency=0 Memory -; CHECK:SU(3): STD renamable $x4, 16, renamable $x5 :: (store 8 into %ir.arrayidx1) +; CHECK-NOT: SU([[REG3]]): Ord Latency=0 Memory +; CHECK:SU([[REG3:[0-9]+]]): STD renamable $x{{[0-9]+}}, 16, renamable $x[[REG5]] ; CHECK: Predecessors: -; CHECK-NOT: SU(2): Ord Latency=0 Memory +; CHECK-NOT: SU([[REG2]]): Ord Latency=0 Memory %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 store i64 %v, i64* %arrayidx %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 store i64 %v, i64* %arrayidx1 ret i64 %v } + +; LXSD is an instruction that can be modeled. +@gd = external local_unnamed_addr global [500 x double], align 8 +@gf = external local_unnamed_addr global [500 x float], align 4 + +define double @test_lxsd_no_barrier(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m) { +entry: + %0 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 10), align 8 + %1 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 17), align 8 + %2 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 87), align 8 + %3 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 97), align 8 + %4 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 77), align 8 + %add = fadd double %a, %b + %add1 = fadd double %add, %c + %add2 = fadd double %add1, %d + %add3 = fadd double %add2, %e + %add4 = fadd double %add3, %f + %add5 = fadd double %add4, %g + %add6 = fadd double %add5, %h + %add7 = fadd double %add6, %i + %add8 = fadd double %add7, %j + %add9 = fadd double %add8, %k + %add10 = fadd double %add9, %l + %add11 = fadd double %add10, %m + %add12 = fadd double %add11, %0 + %add13 = fadd double %add12, %1 + %add14 = fadd double %add13, %2 + %add15 = fadd double %add14, %3 + %add16 = fadd double %add15, %4 + ret double %add16 +; CHECK-P9: ********** MI Scheduling ********** +; CHECK-P9-LABEL: test_lxsd_no_barrier:%bb.0 entry +; CHECK-P9-NOT:Global memory object and new barrier chain: SU({{[0-9]+}}). +; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 136 +; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 696 +; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 776 +; CHECK-P9:SU({{[0-9]+}}): renamable $vf{{[0-9]+}} = LXSD 616 +}