Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -2933,6 +2933,10 @@ PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { + // FP exceptions may change some bits in FPSCR, but they are not modeled well. + // Set side effect here to prevent instructions raising exception from being + // scheduled across read of FPSCR. + let hasSideEffects = 1 in def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), "mffs $rT", IIC_IntMFFS, [(set f64:$rT, (PPCmffs))]>, @@ -4333,6 +4337,7 @@ def : InstAlias<"mtfsfi. $BF, $U", (MTFSFI_rec crrc:$BF, i32imm:$U, 0)>; let Predicates = [HasFPU] in { +let hasSideEffects = 1 in def MTFSF : XFLForm_1<63, 711, (outs), (ins i32imm:$FLM, f8rc:$FRB, i32imm:$L, i32imm:$W), "mtfsf $FLM, $FRB, $L, $W", IIC_IntMFFS, []>; Index: llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -92,18 +92,18 @@ ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 160(1) +; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 0, 28, 29 -; CHECK-NEXT: mtfsf 1, 1 -; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) -; CHECK-NEXT: fctiwz 0, 0 -; CHECK-NEXT: stfd 0, 152(1) +; CHECK-NEXT: fadd 1, 28, 29 +; CHECK-NEXT: mtfsf 1, 0 +; CHECK-NEXT: lfs 0, .LCPI0_1@l(3) +; CHECK-NEXT: fctiwz 1, 1 +; CHECK-NEXT: stfd 1, 152(1) ; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: lwz 3, 164(1) -; CHECK-NEXT: fcmpu 1, 29, 1 +; CHECK-NEXT: fcmpu 1, 29, 0 ; CHECK-NEXT: lwz 4, 156(1) ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 20, 5, 20 @@ -214,20 +214,20 @@ ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 32(1) +; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) +; CHECK-NEXT: lfs 1, .LCPI0_2@l(3) ; CHECK-NEXT: lis 3, .LCPI0_3@ha ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 2, 28, 29 -; CHECK-NEXT: mtfsf 1, 1 -; CHECK-NEXT: lfs 1, .LCPI0_3@l(3) +; CHECK-NEXT: mtfsf 1, 0 +; CHECK-NEXT: lfs 0, .LCPI0_3@l(3) ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 24(1) -; CHECK-NEXT: fcmpu 0, 30, 0 +; CHECK-NEXT: fcmpu 0, 30, 1 ; CHECK-NEXT: lwz 3, 36(1) -; CHECK-NEXT: fcmpu 1, 31, 1 +; CHECK-NEXT: fcmpu 1, 31, 0 ; CHECK-NEXT: lwz 4, 28(1) ; CHECK-NEXT: crandc 20, 6, 1 ; CHECK-NEXT: cror 20, 4, 20 @@ -269,20 +269,20 @@ ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 ; CHECK-NEXT: fctiwz 0, 1 -; CHECK-NEXT: mffs 1 ; CHECK-NEXT: stfd 0, 96(1) +; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 -; CHECK-NEXT: lfs 0, .LCPI0_0@l(3) +; CHECK-NEXT: lfs 1, .LCPI0_0@l(3) ; CHECK-NEXT: lis 3, .LCPI0_1@ha ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 2, 30, 31 -; CHECK-NEXT: mtfsf 1, 1 -; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) +; CHECK-NEXT: mtfsf 1, 0 +; CHECK-NEXT: lfs 0, .LCPI0_1@l(3) ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 88(1) -; CHECK-NEXT: fcmpu 0, 30, 0 +; CHECK-NEXT: fcmpu 0, 30, 1 ; CHECK-NEXT: lwz 3, 100(1) -; CHECK-NEXT: fcmpu 1, 31, 1 +; CHECK-NEXT: fcmpu 1, 31, 0 ; CHECK-NEXT: lwz 4, 92(1) ; CHECK-NEXT: crandc 20, 6, 0 ; CHECK-NEXT: cror 20, 5, 20 Index: llvm/test/CodeGen/PowerPC/fpscr-barrier.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/fpscr-barrier.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux -debug-only=machine-scheduler \ +; RUN: 2>&1 | FileCheck %s --check-prefix=LOG + +; Check if move to/from FPSCR will be FP operation barriers. +define double @sched(double %a, double %b, double %c, double %d) #0 { +entry: +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: sched:%bb.0 entry +; LOG: SU([[M1:[0-9]+]]): %{{[0-9]+}}:vsfrc = XSMULDP %{{[0-9]+}}:vsfrc, %{{[0-9]+}}:vsfrc, implicit $rm +; LOG: SU([[M2:[0-9]+]]): %{{[0-9]+}}:f8rc = MFFS implicit $rm +; LOG: SU([[M1]]): Ord Latency=0 Barrier +; LOG: SU([[M3:[0-9]+]]): %{{[0-9]+}}:vsfrc = XSMULDP %{{[0-9]+}}:vsfrc, %{{[0-9]+}}:vsfrc, implicit $rm +; LOG: SU([[M2]]): Ord Latency=0 Barrier +; LOG: SU([[M4:[0-9]+]]): %{{[0-9]+}}:f8rc = MFFS implicit $rm +; LOG: SU([[M3]]): Ord Latency=0 Barrier +; LOG: SU([[M5:[0-9]+]]): MTFSF 255, %{{[0-9]+}}:f8rc, 0, 0 +; LOG: SU([[M4]]): Ord Latency=0 Barrier +; LOG: SU([[M6:[0-9]+]]): %{{[0-9]+}}:vsfrc = XSADDDP %{{[0-9]+}}:vsfrc, %{{[0-9]+}}:vsfrc, implicit $rm +; LOG: SU([[M5]]): Ord Latency=0 Barrier + +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: sched:%bb.0 entry +; LOG: SU([[M1:[0-9]+]]): renamable $f{{[0-9]+}} = XSMULDP renamable $f{{[0-9]+}}, renamable $f{{[0-9]+}}, implicit $rm +; LOG: SU([[M2:[0-9]+]]): renamable $f{{[0-9]+}} = MFFS implicit $rm +; LOG: SU([[M1]]): Ord Latency=0 Barrier +; LOG: SU([[M3:[0-9]+]]): renamable $f{{[0-9]+}} = XSMULDP renamable $f{{[0-9]+}}, renamable $f{{[0-9]+}}, implicit $rm +; LOG: SU([[M2]]): Ord Latency=0 Barrier +; LOG: SU([[M4:[0-9]+]]): renamable $f{{[0-9]+}} = MFFS implicit $rm +; LOG: SU([[M3]]): Ord Latency=0 Barrier +; LOG: SU([[M5:[0-9]+]]): MTFSF 255, renamable $f{{[0-9]+}}, 0, 0 +; LOG: SU([[M4]]): Ord Latency=0 Barrier +; LOG: SU([[M6:[0-9]+]]): renamable $f{{[0-9]+}} = XSADDDP renamable $f{{[0-9]+}}, renamable $f{{[0-9]+}}, implicit $rm +; LOG: SU([[M5]]): Ord Latency=0 Barrier + %mul = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + %rnd = call i32 @llvm.flt.rounds() + %mul2 = call double @llvm.experimental.constrained.fmul.f64(double %c, double %d, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + call double @llvm.ppc.setrnd(i32 %rnd) + %add = call double @llvm.experimental.constrained.fadd.f64(double %mul, double %mul2, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret double %add +} + +declare i32 @llvm.flt.rounds() +declare double @llvm.ppc.setrnd(i32) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) + +attributes #0 = { strictfp }