diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -31,10 +31,12 @@ // Get content from current FPSCR register def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">, - Intrinsic<[llvm_double_ty], [], [IntrNoMem]>; + Intrinsic<[llvm_double_ty], [], + [IntrNoMerge, IntrHasSideEffects]>; // Set FPSCR register, and return previous content def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; + Intrinsic<[llvm_double_ty], [llvm_double_ty], + [IntrHasSideEffects]>; // Intrinsics for [double]word extended forms of divide instructions def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3108,14 +3108,14 @@ PPC970_DGroup_Single, PPC970_Unit_FPU; } -let Defs = [RM] in { +let Defs = [RM], hasSideEffects = 1 in { let isCodeGenOnly = 1 in def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), "mtfsf $FM, $rT", IIC_IntMTFSB0, [(int_ppc_mtfsf timm:$FM, f64:$rT)]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } -let Uses = [RM] in { +let Uses = [RM], hasSideEffects = 1 in { def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), "mffs $rT", IIC_IntMFFS, [(set f64:$rT, (PPCmffs))]>, @@ -4503,7 +4503,7 @@ // All MTFSF variants may change the rounding mode so conservatively set it // as an implicit def for all of them. let Predicates = [HasFPU] in { -let Defs = [RM] in { +let Defs = [RM], hasSideEffects = 1 in { let isCodeGenOnly = 1, Pattern = [(int_ppc_mtfsfi timm:$BF, timm:$U)], W = 0 in def MTFSFIb : XLForm_4<63, 134, (outs), (ins u3imm:$BF, u4imm:$U), diff --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll --- a/llvm/test/CodeGen/PowerPC/read-set-flm.ll +++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: xsdivdp 1, 1, 2 ; CHECK-NEXT: xsadddp 1, 1, 3 ; CHECK-NEXT: xsadddp 0, 1, 0 +; CHECK-NEXT: mffs 1 ; CHECK-NEXT: mtfsf 255, 4 ; CHECK-NEXT: xsdivdp 1, 3, 4 ; CHECK-NEXT: xsadddp 1, 1, 2 @@ -46,6 +47,7 @@ ; CHECK-NEXT: xsdivdp 1, 1, 2 ; CHECK-NEXT: xsadddp 1, 1, 3 ; CHECK-NEXT: xsadddp 0, 1, 0 +; CHECK-NEXT: mffs 1 ; CHECK-NEXT: mtfsf 255, 4 ; CHECK-NEXT: xsdivdp 1, 3, 4 ; CHECK-NEXT: xsadddp 1, 1, 2 @@ -74,9 +76,88 @@ ret double %7 } +define void @cse_nomerge(double* %f1, double* %f2, double %f3) #0 { +; CHECK-LABEL: cse_nomerge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -24 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: std 30, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: fmr 31, 1 +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(3) +; CHECK-NEXT: bl effect_func +; CHECK-NEXT: nop +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(30) +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: mtfsf 255, 31 +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = call double @llvm.ppc.readflm() + store double %0, double* %f1, align 8 + call void @effect_func() + %1 = call double @llvm.ppc.readflm() + store double %1, double* %f2, align 8 + %2 = call contract double @llvm.ppc.setflm(double %f3) + ret void +} + +define void @cse_nomerge_readonly(double* %f1, double* %f2, double %f3) #0 { +; CHECK-LABEL: cse_nomerge_readonly: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -24 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: std 30, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: fmr 31, 1 +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(3) +; CHECK-NEXT: bl readonly_func +; CHECK-NEXT: nop +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(30) +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: mtfsf 255, 31 +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = call double @llvm.ppc.readflm() + store double %0, double* %f1, align 8 + call void @readonly_func() + %1 = call double @llvm.ppc.readflm() + store double %1, double* %f2, align 8 + %2 = call contract double @llvm.ppc.setflm(double %f3) + ret void +} + +declare void @effect_func() +declare void @readonly_func() #1 declare double @llvm.ppc.readflm() declare double @llvm.ppc.setflm(double) declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) attributes #0 = { strictfp } +attributes #1 = { readonly }