diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2457,7 +2457,8 @@ } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, - bool NeedsWinCFI, bool IsFirst) { + bool NeedsWinCFI, bool IsFirst, + const TargetRegisterInfo *TRI) { // If we are generating register pairs for a Windows function that requires // EH support, then pair consecutive registers only. There are no unwind // opcodes for saves/restores of non-consectuve register pairs. @@ -2469,7 +2470,7 @@ return true; if (!NeedsWinCFI) return false; - if (Reg2 == Reg1 + 1) + if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1) return false; // If pairing a GPR with LR, the pair can be described by the save_lrpair // opcode. If this is the first register pair, it would end up with a @@ -2488,9 +2489,11 @@ /// the frame-record. This means any other register pairing with LR is invalid. static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, bool UsesWinAAPCS, bool NeedsWinCFI, - bool NeedsFrameRecord, bool IsFirst) { + bool NeedsFrameRecord, bool IsFirst, + const TargetRegisterInfo *TRI) { if (UsesWinAAPCS) - return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst); + return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst, + TRI); // If we need to store the frame record, don't pair any register // with LR other than FP. @@ -2595,13 +2598,14 @@ case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, - NeedsWinCFI, NeedsFrameRecord, IsFirst)) + NeedsWinCFI, NeedsFrameRecord, IsFirst, + TRI)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: if (AArch64::FPR64RegClass.contains(NextReg) && !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, - IsFirst)) + IsFirst, TRI)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR128: diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4683,7 +4683,7 @@ // Floating point to integer conversion //--- -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class BaseFPToIntegerUnscaled type, bits<2> rmode, bits<3> opcode, RegisterClass srcType, RegisterClass dstType, string asm, list pattern> @@ -4703,7 +4703,7 @@ let Inst{4-0} = Rd; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, RegisterClass srcType, RegisterClass dstType, Operand immType, string asm, list pattern> @@ -4825,7 +4825,7 @@ // Integer to floating point conversion //--- -let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseIntegerToFP pattern> @@ -4843,7 +4843,7 @@ let Inst{4-0} = Rd; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class BaseIntegerToFPUnscaled @@ -5080,7 +5080,7 @@ // Floating point conversion //--- -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class BaseFPConversion type, bits<2> opcode, RegisterClass dstType, RegisterClass srcType, string asm, list pattern> : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, @@ -5145,7 +5145,7 @@ multiclass SingleOperandFPData opcode, string asm, SDPatternOperator node = null_frag, int fpexceptions = 1> { - let mayRaiseFPException = fpexceptions in { + let mayRaiseFPException = fpexceptions, Uses = !if(fpexceptions,[FPCR],[]) in { def Hr : BaseSingleOperandFPData<{0b00,opcode}, FPR16, f16, asm, node> { let Inst{23-22} = 0b11; // 16-bit size flag let Predicates = [HasFullFP16]; @@ -5165,7 +5165,7 @@ SDPatternOperator node = null_frag> : SingleOperandFPData; -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SingleOperandFPNo16 opcode, string asm, SDPatternOperator node = null_frag>{ @@ -5186,7 +5186,7 @@ // Two operand floating point data processing //--- -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseTwoOperandFPData opcode, RegisterClass regtype, string asm, list pat> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), @@ -5250,7 +5250,7 @@ // Three operand floating point data processing //--- -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class BaseThreeOperandFPData pat> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), @@ -5295,7 +5295,7 @@ // Floating point data comparisons //--- -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseOneOperandFPComparison pat> @@ -5314,7 +5314,7 @@ let PostEncoderMethod = "fixOneOperandFPComparison"; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseTwoOperandFPComparison pat> : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, @@ -5371,7 +5371,7 @@ // Floating point conditional comparisons //--- -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseFPCondComparison pat> : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond), @@ -5697,7 +5697,7 @@ } // As above, but only floating point elements supported. -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDThreeSameVectorFP opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -5719,7 +5719,7 @@ [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDThreeSameVectorFPCmp opc, string asm, SDPatternOperator OpNode> { @@ -5742,7 +5742,7 @@ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDThreeSameVectorFPTied opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -5770,7 +5770,7 @@ } // As above, but D and B sized elements unsupported. -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDThreeSameVectorHS opc, string asm, SDPatternOperator OpNode> { def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, @@ -5875,7 +5875,7 @@ // ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions // select inputs from 4H vectors and accumulate outputs to a 2S vector (or from // 8H to 4S, when Q=1). -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDThreeSameVectorFML size, string asm, string kind1, string kind2, RegisterOperand RegType, ValueType AccumType, ValueType InputType, @@ -6146,7 +6146,7 @@ multiclass SIMDTwoVectorFP opc, string asm, SDPatternOperator OpNode, int fpexceptions = 1> { - let mayRaiseFPException = fpexceptions in { + let mayRaiseFPException = fpexceptions, Uses = !if(fpexceptions,[FPCR],[]) in { let Predicates = [HasNEON, HasFullFP16] in { def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, asm, ".4h", ".4h", @@ -6172,7 +6172,7 @@ : SIMDTwoVectorFP; // Supports only S and D element sizes -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDTwoVectorSD opc, string asm, SDPatternOperator OpNode = null_frag> { @@ -6202,7 +6202,7 @@ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDTwoVectorFPToInt opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -6224,7 +6224,7 @@ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDTwoVectorIntToFP opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -6376,7 +6376,7 @@ multiclass SIMDFPCmpTwoVector opc, string asm, SDNode OpNode> { - let mayRaiseFPException = 1 in { + let mayRaiseFPException = 1, Uses = [FPCR] in { let Predicates = [HasNEON, HasFullFP16] in { def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, asm, ".4h", "0.0", @@ -6422,7 +6422,7 @@ (!cast(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDFPCvtTwoVector size, bits<5> opcode, RegisterOperand outtype, RegisterOperand intype, string asm, string VdTy, string VnTy, @@ -6444,7 +6444,7 @@ let Inst{4-0} = Rd; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDFPCvtTwoVectorTied size, bits<5> opcode, RegisterOperand outtype, RegisterOperand intype, string asm, string VdTy, string VnTy, @@ -7052,7 +7052,7 @@ multiclass SIMDFPThreeScalar opc, string asm, SDPatternOperator OpNode = null_frag, Predicate pred = HasNEON> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in { let Predicates = [pred] in { def NAME#64 : BaseSIMDThreeScalar; @@ -7071,7 +7071,7 @@ multiclass SIMDThreeScalarFPCmp opc, string asm, SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in { def NAME#64 : BaseSIMDThreeScalar; def NAME#32 : BaseSIMDThreeScalar opcode, string asm> : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, @@ -7225,7 +7225,7 @@ multiclass SIMDFPCmpTwoScalar opc, string asm, SDPatternOperator OpNode> { - let mayRaiseFPException = 1 in { + let mayRaiseFPException = 1, Uses = [FPCR] in { def v1i64rz : BaseSIMDCmpTwoScalar; def v1i32rz : BaseSIMDCmpTwoScalar; let Predicates = [HasNEON, HasFullFP16] in { @@ -7255,7 +7255,7 @@ (!cast(NAME # "v1i64") FPR64:$Rn)>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPTwoScalar opc, string asm, Predicate pred = HasNEON> { let Predicates = [pred] in { @@ -7267,7 +7267,7 @@ } } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPTwoScalarCVT opc, string asm, SDPatternOperator OpNode> { def v1i64 : BaseSIMDTwoScalar; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPPairwiseScalar opc, string asm> { let Predicates = [HasNEON, HasFullFP16] in { def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, @@ -7414,7 +7414,7 @@ asm, ".4s", []>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, Intrinsic intOp> { let Predicates = [HasNEON, HasFullFP16] in { @@ -8231,7 +8231,7 @@ ".2h", V128, v4f32, v8bf16>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class SIMDBF16MLAL : BaseSIMDThreeSameVectorTied : I<(outs V128:$dst), (ins V128:$Rd, V128:$Rn, V128_lo:$Rm, VectorIndexH:$idx), asm, @@ -8280,21 +8280,21 @@ ", $Rm", ".8h", "}"); } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class SIMD_BFCVTN : BaseSIMDMixedTwoVector<0, 0, 0b10, 0b10110, V128, V128, "bfcvtn", ".4h", ".4s", [(set (v8bf16 V128:$Rd), (int_aarch64_neon_bfcvtn (v4f32 V128:$Rn)))]>; -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class SIMD_BFCVTN2 : BaseSIMDMixedTwoVectorTied<1, 0, 0b10, 0b10110, V128, V128, "bfcvtn2", ".8h", ".4s", [(set (v8bf16 V128:$dst), (int_aarch64_neon_bfcvtn2 (v8bf16 V128:$Rd), (v4f32 V128:$Rn)))]>; -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class BF16ToSinglePrecision : I<(outs FPR16:$Rd), (ins FPR32:$Rn), asm, "\t$Rd, $Rn", "", [(set (bf16 FPR16:$Rd), (int_aarch64_neon_bfcvt (f32 FPR32:$Rn)))]>, @@ -8348,7 +8348,7 @@ } // ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed) -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDThreeSameVectorFMLIndex opc, string asm, string dst_kind, string lhs_kind, string rhs_kind, RegisterOperand RegType, @@ -8376,7 +8376,7 @@ V128, v4f32, v8f16, OpNode>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPIndexed opc, string asm, SDPatternOperator OpNode> { let Predicates = [HasNEON, HasFullFP16] in { @@ -8559,7 +8559,7 @@ V128:$Rm, VectorIndexD:$idx)>; } -let mayRaiseFPException = 1 in +let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPIndexedTied opc, string asm> { let Predicates = [HasNEON, HasFullFP16] in { def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, @@ -10863,7 +10863,7 @@ let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd">; let PrintMethod = "printComplexRotationOp<180, 90>"; } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDThreeSameVectorComplex size, bits<3> opcode, RegisterOperand regtype, Operand rottype, string asm, string kind, list pattern> @@ -10934,7 +10934,7 @@ } } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDThreeSameVectorTiedComplex size, bits<3> opcode, RegisterOperand regtype, @@ -11006,7 +11006,7 @@ } } -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in class BaseSIMDIndexedTiedComplex size, bit opc1, bit opc2, RegisterOperand dst_reg, RegisterOperand lhs_reg, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1506,8 +1506,16 @@ def : Pat<(readcyclecounter), (MRS 0xdce8)>; // FPCR register -def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>; -def : Pat<(int_aarch64_set_fpcr i64:$val), (MSR 0xda20, GPR64:$val)>; +let Uses = [FPCR] in +def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), + [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, + PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, + Sched<[WriteSys]>; +let Defs = [FPCR] in +def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), + [(int_aarch64_set_fpcr i64:$val)]>, + PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, + Sched<[WriteSys]>; // Generic system instructions def SYSxt : SystemXtI<0, "sys">; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -424,6 +424,8 @@ Reserved.set(*SubReg); } + markSuperRegs(Reserved, AArch64::FPCR); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -146,6 +146,9 @@ // Purely virtual Vector Granule (VG) Dwarf register def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>; +// Floating-point control register +def FPCR : AArch64Reg<0, "fpcr">; + // GPR register classes with the intersections of GPR32/GPR32sp and // GPR64/GPR64sp for use by the coalescer. def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir @@ -26,12 +26,12 @@ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[DEF]] ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 872415232 ; CHECK: [[COPY4:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] - ; CHECK: FCMPSrr [[COPY3]], [[COPY4]], implicit-def $nzcv + ; CHECK: FCMPSrr [[COPY3]], [[COPY4]], implicit-def $nzcv, implicit $fpcr ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv ; CHECK: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY1]], 1, 0 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBWri]], %subreg.sub_32 ; CHECK: [[COPY5:%[0-9]+]]:fpr32 = COPY [[DEF]] - ; CHECK: FCMPSrr [[COPY5]], [[COPY4]], implicit-def $nzcv + ; CHECK: FCMPSrr [[COPY5]], [[COPY4]], implicit-def $nzcv, implicit $fpcr ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[CSINCWr]], [[CSINCWr1]] ; CHECK: TBNZW [[EORWrr]], 0, %bb.2 @@ -41,9 +41,9 @@ ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 60, 59 ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[UBFMXri]], 0, 0 :: (load (s32)) ; CHECK: [[COPY6:%[0-9]+]]:fpr32 = COPY [[DEF]] - ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY6]], [[LDRSroX]] + ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY6]], [[LDRSroX]], implicit $fpcr ; CHECK: [[COPY7:%[0-9]+]]:fpr32 = COPY [[DEF]] - ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[FMULSrr]], [[COPY7]] + ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[FMULSrr]], [[COPY7]], implicit $fpcr ; CHECK: STRSui [[FADDSrr]], [[COPY2]], 0 :: (store (s32)) ; CHECK: bb.2: ; CHECK: RET_ReallyLR @@ -57,12 +57,12 @@ %29:fpr32 = COPY %3 %33:gpr32 = MOVi32imm 872415232 %4:fpr32 = COPY %33 - FCMPSrr %29, %4, implicit-def $nzcv + FCMPSrr %29, %4, implicit-def $nzcv, implicit $fpcr %28:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv %7:gpr32 = SUBSWri %1, 1, 0, implicit-def $nzcv %8:gpr64 = SUBREG_TO_REG 0, %7, %subreg.sub_32 %30:fpr32 = COPY %3 - FCMPSrr %30, %4, implicit-def $nzcv + FCMPSrr %30, %4, implicit-def $nzcv, implicit $fpcr %27:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv %26:gpr32 = EORWrr %28, %27 TBNZW %26, 0, %bb.3 @@ -72,9 +72,9 @@ %12:gpr64 = UBFMXri %8, 60, 59 %15:fpr32 = LDRSroX %0, %12, 0, 0 :: (load (s32)) %31:fpr32 = COPY %3 - %16:fpr32 = FMULSrr %31, %15 + %16:fpr32 = FMULSrr %31, %15, implicit $fpcr %32:fpr32 = COPY %3 - %17:fpr32 = FADDSrr %16, %32 + %17:fpr32 = FADDSrr %16, %32, implicit $fpcr STRSui %17, %2, 0 :: (store (s32)) bb.3: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir @@ -21,9 +21,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv @@ -34,9 +34,9 @@ %3:fpr32 = COPY $s0 %4:fpr32 = COPY $s1 %26:gpr32 = COPY $wzr - FCMPSrr %3, %4, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr %14:gpr32common = UBFMWri %12, 1, 31 %60:gpr32 = MOVi32imm 1 %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv @@ -65,9 +65,9 @@ ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 ; CHECK: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv @@ -78,9 +78,9 @@ %3:fpr64 = COPY $d0 %4:fpr64 = COPY $d1 %26:gpr32 = COPY $wzr - FCMPDrr %3, %4, implicit-def $nzcv + FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPDrr %3, %4, implicit-def $nzcv + FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr %14:gpr32common = UBFMWri %12, 1, 31 %60:gpr32 = MOVi32imm 1 %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv @@ -109,11 +109,11 @@ ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr ; CHECK: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv @@ -124,11 +124,11 @@ %3:fpr32 = COPY $s0 %4:fpr32 = COPY $s1 %26:gpr32 = COPY $wzr - FCMPSrr %3, %4, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr %14:gpr32common = UBFMWri %12, 1, 31 %60:gpr32 = MOVi32imm 1 %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv @@ -158,7 +158,7 @@ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY1]], [[COPY4]], implicit-def dead $nzcv - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv @@ -171,7 +171,7 @@ %4:fpr32 = COPY $s1 %26:gpr32 = COPY $wzr %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr %14:gpr32common = UBFMWri %12, 1, 31 %60:gpr32 = MOVi32imm 1 %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-frint-nofp16.mir @@ -17,9 +17,9 @@ ; CHECK: liveins: $h0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h0 - ; CHECK-NEXT: %1:fpr32 = nofpexcept FCVTSHr [[COPY]] - ; CHECK-NEXT: %2:fpr32 = nofpexcept FRINTXSr %1 - ; CHECK-NEXT: %3:fpr16 = nofpexcept FCVTHSr %2 + ; CHECK-NEXT: %1:fpr32 = nofpexcept FCVTSHr [[COPY]], implicit $fpcr + ; CHECK-NEXT: %2:fpr32 = nofpexcept FRINTXSr %1, implicit $fpcr + ; CHECK-NEXT: %3:fpr16 = nofpexcept FCVTHSr %2, implicit $fpcr ; CHECK-NEXT: $h0 = COPY %3 ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:fpr(s16) = COPY $h0 @@ -55,18 +55,18 @@ ; CHECK-NEXT: [[DUPi16_:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG]], 1 ; CHECK-NEXT: [[DUPi16_1:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG1]], 2 ; CHECK-NEXT: [[DUPi16_2:%[0-9]+]]:fpr16 = DUPi16 [[INSERT_SUBREG2]], 3 - ; CHECK-NEXT: %5:fpr32 = nofpexcept FCVTSHr [[COPY1]] - ; CHECK-NEXT: %6:fpr32 = nofpexcept FRINTXSr %5 - ; CHECK-NEXT: %7:fpr16 = nofpexcept FCVTHSr %6 - ; CHECK-NEXT: %8:fpr32 = nofpexcept FCVTSHr [[DUPi16_]] - ; CHECK-NEXT: %9:fpr32 = nofpexcept FRINTXSr %8 - ; CHECK-NEXT: %10:fpr16 = nofpexcept FCVTHSr %9 - ; CHECK-NEXT: %11:fpr32 = nofpexcept FCVTSHr [[DUPi16_1]] - ; CHECK-NEXT: %12:fpr32 = nofpexcept FRINTXSr %11 - ; CHECK-NEXT: %13:fpr16 = nofpexcept FCVTHSr %12 - ; CHECK-NEXT: %14:fpr32 = nofpexcept FCVTSHr [[DUPi16_2]] - ; CHECK-NEXT: %15:fpr32 = nofpexcept FRINTXSr %14 - ; CHECK-NEXT: %16:fpr16 = nofpexcept FCVTHSr %15 + ; CHECK-NEXT: %5:fpr32 = nofpexcept FCVTSHr [[COPY1]], implicit $fpcr + ; CHECK-NEXT: %6:fpr32 = nofpexcept FRINTXSr %5, implicit $fpcr + ; CHECK-NEXT: %7:fpr16 = nofpexcept FCVTHSr %6, implicit $fpcr + ; CHECK-NEXT: %8:fpr32 = nofpexcept FCVTSHr [[DUPi16_]], implicit $fpcr + ; CHECK-NEXT: %9:fpr32 = nofpexcept FRINTXSr %8, implicit $fpcr + ; CHECK-NEXT: %10:fpr16 = nofpexcept FCVTHSr %9, implicit $fpcr + ; CHECK-NEXT: %11:fpr32 = nofpexcept FCVTSHr [[DUPi16_1]], implicit $fpcr + ; CHECK-NEXT: %12:fpr32 = nofpexcept FRINTXSr %11, implicit $fpcr + ; CHECK-NEXT: %13:fpr16 = nofpexcept FCVTHSr %12, implicit $fpcr + ; CHECK-NEXT: %14:fpr32 = nofpexcept FCVTSHr [[DUPi16_2]], implicit $fpcr + ; CHECK-NEXT: %15:fpr32 = nofpexcept FRINTXSr %14, implicit $fpcr + ; CHECK-NEXT: %16:fpr16 = nofpexcept FCVTHSr %15, implicit $fpcr ; CHECK-NEXT: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], %7, %subreg.hsub ; CHECK-NEXT: [[DEF4:%[0-9]+]]:fpr128 = IMPLICIT_DEF @@ -123,30 +123,30 @@ ; CHECK-NEXT: [[DUPi16_4:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 5 ; CHECK-NEXT: [[DUPi16_5:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 6 ; CHECK-NEXT: [[DUPi16_6:%[0-9]+]]:fpr16 = DUPi16 [[COPY]], 7 - ; CHECK-NEXT: %9:fpr32 = nofpexcept FCVTSHr [[COPY1]] - ; CHECK-NEXT: %10:fpr32 = nofpexcept FRINTXSr %9 - ; CHECK-NEXT: %11:fpr16 = nofpexcept FCVTHSr %10 - ; CHECK-NEXT: %12:fpr32 = nofpexcept FCVTSHr [[DUPi16_]] - ; CHECK-NEXT: %13:fpr32 = nofpexcept FRINTXSr %12 - ; CHECK-NEXT: %14:fpr16 = nofpexcept FCVTHSr %13 - ; CHECK-NEXT: %15:fpr32 = nofpexcept FCVTSHr [[DUPi16_1]] - ; CHECK-NEXT: %16:fpr32 = nofpexcept FRINTXSr %15 - ; CHECK-NEXT: %17:fpr16 = nofpexcept FCVTHSr %16 - ; CHECK-NEXT: %18:fpr32 = nofpexcept FCVTSHr [[DUPi16_2]] - ; CHECK-NEXT: %19:fpr32 = nofpexcept FRINTXSr %18 - ; CHECK-NEXT: %20:fpr16 = nofpexcept FCVTHSr %19 - ; CHECK-NEXT: %21:fpr32 = nofpexcept FCVTSHr [[DUPi16_3]] - ; CHECK-NEXT: %22:fpr32 = nofpexcept FRINTXSr %21 - ; CHECK-NEXT: %23:fpr16 = nofpexcept FCVTHSr %22 - ; CHECK-NEXT: %24:fpr32 = nofpexcept FCVTSHr [[DUPi16_4]] - ; CHECK-NEXT: %25:fpr32 = nofpexcept FRINTXSr %24 - ; CHECK-NEXT: %26:fpr16 = nofpexcept FCVTHSr %25 - ; CHECK-NEXT: %27:fpr32 = nofpexcept FCVTSHr [[DUPi16_5]] - ; CHECK-NEXT: %28:fpr32 = nofpexcept FRINTXSr %27 - ; CHECK-NEXT: %29:fpr16 = nofpexcept FCVTHSr %28 - ; CHECK-NEXT: %30:fpr32 = nofpexcept FCVTSHr [[DUPi16_6]] - ; CHECK-NEXT: %31:fpr32 = nofpexcept FRINTXSr %30 - ; CHECK-NEXT: %32:fpr16 = nofpexcept FCVTHSr %31 + ; CHECK-NEXT: %9:fpr32 = nofpexcept FCVTSHr [[COPY1]], implicit $fpcr + ; CHECK-NEXT: %10:fpr32 = nofpexcept FRINTXSr %9, implicit $fpcr + ; CHECK-NEXT: %11:fpr16 = nofpexcept FCVTHSr %10, implicit $fpcr + ; CHECK-NEXT: %12:fpr32 = nofpexcept FCVTSHr [[DUPi16_]], implicit $fpcr + ; CHECK-NEXT: %13:fpr32 = nofpexcept FRINTXSr %12, implicit $fpcr + ; CHECK-NEXT: %14:fpr16 = nofpexcept FCVTHSr %13, implicit $fpcr + ; CHECK-NEXT: %15:fpr32 = nofpexcept FCVTSHr [[DUPi16_1]], implicit $fpcr + ; CHECK-NEXT: %16:fpr32 = nofpexcept FRINTXSr %15, implicit $fpcr + ; CHECK-NEXT: %17:fpr16 = nofpexcept FCVTHSr %16, implicit $fpcr + ; CHECK-NEXT: %18:fpr32 = nofpexcept FCVTSHr [[DUPi16_2]], implicit $fpcr + ; CHECK-NEXT: %19:fpr32 = nofpexcept FRINTXSr %18, implicit $fpcr + ; CHECK-NEXT: %20:fpr16 = nofpexcept FCVTHSr %19, implicit $fpcr + ; CHECK-NEXT: %21:fpr32 = nofpexcept FCVTSHr [[DUPi16_3]], implicit $fpcr + ; CHECK-NEXT: %22:fpr32 = nofpexcept FRINTXSr %21, implicit $fpcr + ; CHECK-NEXT: %23:fpr16 = nofpexcept FCVTHSr %22, implicit $fpcr + ; CHECK-NEXT: %24:fpr32 = nofpexcept FCVTSHr [[DUPi16_4]], implicit $fpcr + ; CHECK-NEXT: %25:fpr32 = nofpexcept FRINTXSr %24, implicit $fpcr + ; CHECK-NEXT: %26:fpr16 = nofpexcept FCVTHSr %25, implicit $fpcr + ; CHECK-NEXT: %27:fpr32 = nofpexcept FCVTSHr [[DUPi16_5]], implicit $fpcr + ; CHECK-NEXT: %28:fpr32 = nofpexcept FRINTXSr %27, implicit $fpcr + ; CHECK-NEXT: %29:fpr16 = nofpexcept FCVTHSr %28, implicit $fpcr + ; CHECK-NEXT: %30:fpr32 = nofpexcept FCVTSHr [[DUPi16_6]], implicit $fpcr + ; CHECK-NEXT: %31:fpr32 = nofpexcept FRINTXSr %30, implicit $fpcr + ; CHECK-NEXT: %32:fpr16 = nofpexcept FCVTHSr %31, implicit $fpcr ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %11, %subreg.hsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir --- a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir @@ -16,19 +16,19 @@ %2:fpr64 = COPY $d2 %1:fpr64 = COPY $d1 %0:fpr64 = COPY $d0 - %3:fpr64 = FMULv2f32 %0, %1 - %4:fpr64 = FSUBv2f32 killed %3, %2 + %3:fpr64 = FMULv2f32 %0, %1, implicit $fpcr + %4:fpr64 = FSUBv2f32 killed %3, %2, implicit $fpcr $d0 = COPY %4 RET_ReallyLR implicit $d0 ... # UNPROFITABLE-LABEL: name: f1_2s # UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2 -# UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1 +# UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr # # PROFITABLE-LABEL: name: f1_2s # PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2 -# PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1 +# PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_4s registers: @@ -42,19 +42,19 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = FMULv4f32 %0, %1 - %4:fpr128 = FSUBv4f32 killed %3, %2 + %3:fpr128 = FMULv4f32 %0, %1, implicit $fpcr + %4:fpr128 = FSUBv4f32 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_4s -# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1 -# UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2 +# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1, implicit $fpcr +# UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2, implicit $fpcr # # PROFITABLE-LABEL: name: f1_4s # PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2 -# PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1 +# PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_2d registers: @@ -68,19 +68,19 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = FMULv2f64 %0, %1 - %4:fpr128 = FSUBv2f64 killed %3, %2 + %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr + %4:fpr128 = FSUBv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_2d -# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1 -# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2 +# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr +# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2, implicit $fpcr # # PROFITABLE-LABEL: name: f1_2d # PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2 -# PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1 +# PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_both_fmul_2s registers: @@ -97,16 +97,16 @@ %2:fpr64 = COPY $q2 %1:fpr64 = COPY $q1 %0:fpr64 = COPY $q0 - %4:fpr64 = FMULv2f32 %0, %1 - %5:fpr64 = FMULv2f32 %2, %3 - %6:fpr64 = FSUBv2f32 killed %4, %5 + %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr + %5:fpr64 = FMULv2f32 %2, %3, implicit $fpcr + %6:fpr64 = FSUBv2f32 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2s -# ALL: %4:fpr64 = FMULv2f32 %0, %1 -# ALL-NEXT: FMLSv2f32 killed %4, %2, %3 +# ALL: %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr +# ALL-NEXT: FMLSv2f32 killed %4, %2, %3, implicit $fpcr --- name: f1_both_fmul_4s registers: @@ -123,16 +123,16 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %4:fpr128 = FMULv4f32 %0, %1 - %5:fpr128 = FMULv4f32 %2, %3 - %6:fpr128 = FSUBv4f32 killed %4, %5 + %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr + %5:fpr128 = FMULv4f32 %2, %3, implicit $fpcr + %6:fpr128 = FSUBv4f32 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_4s -# ALL: %4:fpr128 = FMULv4f32 %0, %1 -# ALL-NEXT: FMLSv4f32 killed %4, %2, %3 +# ALL: %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr +# ALL-NEXT: FMLSv4f32 killed %4, %2, %3, implicit $fpcr --- name: f1_both_fmul_2d registers: @@ -149,14 +149,14 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %4:fpr128 = FMULv2f64 %0, %1 - %5:fpr128 = FMULv2f64 %2, %3 - %6:fpr128 = FSUBv2f64 killed %4, %5 + %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr + %5:fpr128 = FMULv2f64 %2, %3, implicit $fpcr + %6:fpr128 = FSUBv2f64 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2d -# ALL: %4:fpr128 = FMULv2f64 %0, %1 -# ALL-NEXT: FMLSv2f64 killed %4, %2, %3 +# ALL: %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr +# ALL-NEXT: FMLSv2f64 killed %4, %2, %3, implicit $fpcr diff --git a/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir --- a/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir +++ b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir @@ -149,14 +149,14 @@ %9:gpr64 = PHI %7, %bb.2, %26, %bb.3, %27, %bb.4 %21:fpr64 = COPY %9 %25:fpr64 = FMOVD0 - %10:fpr64 = FADDDrr %25, %21 + %10:fpr64 = FADDDrr %25, %21, implicit $fpcr %22:fpr64 = COPY %4 - %11:fpr64 = FMULDrr %22, %10 + %11:fpr64 = FMULDrr %22, %10, implicit $fpcr %23:fpr64 = COPY %4 - %13:fpr64 = FABD64 %23, %11 + %13:fpr64 = FABD64 %23, %11, implicit $fpcr %28:gpr64 = MOVi64imm 4307583784117748259 %24:fpr64 = COPY %28 - FCMPDrr %13, %24, implicit-def $nzcv + FCMPDrr %13, %24, implicit-def $nzcv, implicit $fpcr %17:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv TBNZW %17, 0, %bb.6 B %bb.7 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-fmul-dup.mir @@ -143,8 +143,8 @@ ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond: - ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0 - ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]] + ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]], implicit $fpcr ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64) into %ir.ret, align 16) ; CHECK-NEXT: B %bb.1 bb.0.entry: @@ -161,8 +161,8 @@ %0:fpr64 = DUPv2i32lane killed %7, 0 bb.1.for.cond: - %9:fpr64 = FMULv2f32 %5, %0 - %10:fpr64 = FADDv2f32 killed %9, %6 + %9:fpr64 = FMULv2f32 %5, %0, implicit $fpcr + %10:fpr64 = FADDv2f32 killed %9, %6, implicit $fpcr STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16) B %bb.1 @@ -208,8 +208,8 @@ ; CHECK-NEXT: [[DUPv2i32lane:%[0-9]+]]:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond: - ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0 - ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]] + ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]], implicit $fpcr ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64) into %ir.ret, align 16) ; CHECK-NEXT: B %bb.1 bb.0.entry: @@ -226,8 +226,8 @@ %0:fpr64 = DUPv2i32lane killed %7, 0 bb.1.for.cond: - %9:fpr64 = FMULv2f32 %0, %5 - %10:fpr64 = FADDv2f32 killed %9, %6 + %9:fpr64 = FMULv2f32 %0, %5, implicit $fpcr + %10:fpr64 = FADDv2f32 killed %9, %6, implicit $fpcr STRDui killed %10, %4, 0 :: (store 8 into %ir.ret, align 16) B %bb.1 @@ -269,8 +269,8 @@ ; CHECK-NEXT: [[DUPv2i64lane:%[0-9]+]]:fpr128 = DUPv2i64lane [[COPY3]], 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond: - ; CHECK-NEXT: [[FMULv2i64_indexed:%[0-9]+]]:fpr128 = FMULv2i64_indexed [[COPY5]], [[COPY3]], 0 - ; CHECK-NEXT: [[FADDv2f64_:%[0-9]+]]:fpr128 = FADDv2f64 killed [[FMULv2i64_indexed]], [[COPY4]] + ; CHECK-NEXT: [[FMULv2i64_indexed:%[0-9]+]]:fpr128 = FMULv2i64_indexed [[COPY5]], [[COPY3]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv2f64_:%[0-9]+]]:fpr128 = FADDv2f64 killed [[FMULv2i64_indexed]], [[COPY4]], implicit $fpcr ; CHECK-NEXT: STRQui killed [[FADDv2f64_]], [[COPY]], 0 :: (store (s128) into %ir.ret) ; CHECK-NEXT: B %bb.1 bb.0.entry: @@ -285,8 +285,8 @@ %0:fpr128 = DUPv2i64lane %1, 0 bb.1.for.cond: - %7:fpr128 = FMULv2f64 %5, %0 - %8:fpr128 = FADDv2f64 killed %7, %6 + %7:fpr128 = FMULv2f64 %5, %0, implicit $fpcr + %8:fpr128 = FADDv2f64 killed %7, %6, implicit $fpcr STRQui killed %8, %4, 0 :: (store 16 into %ir.ret) B %bb.1 @@ -328,8 +328,8 @@ ; CHECK-NEXT: [[DUPv4i32lane:%[0-9]+]]:fpr128 = DUPv4i32lane [[COPY3]], 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond: - ; CHECK-NEXT: [[FMULv4i32_indexed:%[0-9]+]]:fpr128 = FMULv4i32_indexed [[COPY5]], [[COPY3]], 0 - ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = FADDv4f32 killed [[FMULv4i32_indexed]], [[COPY4]] + ; CHECK-NEXT: [[FMULv4i32_indexed:%[0-9]+]]:fpr128 = FMULv4i32_indexed [[COPY5]], [[COPY3]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = FADDv4f32 killed [[FMULv4i32_indexed]], [[COPY4]], implicit $fpcr ; CHECK-NEXT: STRQui killed [[FADDv4f32_]], [[COPY]], 0 :: (store (s128) into %ir.ret) ; CHECK-NEXT: B %bb.1 bb.0.entry: @@ -344,8 +344,8 @@ %0:fpr128 = DUPv4i32lane %1, 0 bb.1.for.cond: - %7:fpr128 = FMULv4f32 %5, %0 - %8:fpr128 = FADDv4f32 killed %7, %6 + %7:fpr128 = FMULv4f32 %5, %0, implicit $fpcr + %8:fpr128 = FADDv4f32 killed %7, %6, implicit $fpcr STRQui killed %8, %4, 0 :: (store 16 into %ir.ret) B %bb.1 @@ -387,8 +387,8 @@ ; CHECK-NEXT: [[DUPv4i16lane:%[0-9]+]]:fpr64 = DUPv4i16lane [[INSERT_SUBREG]], 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond: - ; CHECK-NEXT: [[FMULv4i16_indexed:%[0-9]+]]:fpr64 = FMULv4i16_indexed [[COPY2]], [[INSERT_SUBREG]], 0 - ; CHECK-NEXT: [[FADDv4f16_:%[0-9]+]]:fpr64 = FADDv4f16 killed [[FMULv4i16_indexed]], [[COPY1]] + ; CHECK-NEXT: [[FMULv4i16_indexed:%[0-9]+]]:fpr64 = FMULv4i16_indexed [[COPY2]], [[INSERT_SUBREG]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv4f16_:%[0-9]+]]:fpr64 = FADDv4f16 killed [[FMULv4i16_indexed]], [[COPY1]], implicit $fpcr ; CHECK-NEXT: STRDui killed [[FADDv4f16_]], [[COPY]], 0 :: (store (s64) into %ir.ret, align 16) ; CHECK-NEXT: B %bb.1 bb.0.entry: @@ -403,8 +403,8 @@ %0:fpr64 = DUPv4i16lane killed %5, 0 bb.1.for.cond: - %7:fpr64 = FMULv4f16 %2, %0 - %8:fpr64 = FADDv4f16 killed %7, %3 + %7:fpr64 = FMULv4f16 %2, %0, implicit $fpcr + %8:fpr64 = FADDv4f16 killed %7, %3, implicit $fpcr STRDui killed %8, %4, 0 :: (store 8 into %ir.ret, align 16) B %bb.1 @@ -442,8 +442,8 @@ ; CHECK-NEXT: [[DUPv8i16lane:%[0-9]+]]:fpr128 = DUPv8i16lane [[COPY3]], 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond: - ; CHECK-NEXT: [[FMULv8i16_indexed:%[0-9]+]]:fpr128 = FMULv8i16_indexed [[COPY2]], [[COPY3]], 0 - ; CHECK-NEXT: [[FADDv8f16_:%[0-9]+]]:fpr128 = FADDv8f16 killed [[FMULv8i16_indexed]], [[COPY1]] + ; CHECK-NEXT: [[FMULv8i16_indexed:%[0-9]+]]:fpr128 = FMULv8i16_indexed [[COPY2]], [[COPY3]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv8f16_:%[0-9]+]]:fpr128 = FADDv8f16 killed [[FMULv8i16_indexed]], [[COPY1]], implicit $fpcr ; CHECK-NEXT: STRQui killed [[FADDv8f16_]], [[COPY]], 0 :: (store (s128) into %ir.ret) ; CHECK-NEXT: B %bb.1 bb.0.entry: @@ -456,8 +456,8 @@ %0:fpr128 = DUPv8i16lane %1, 0 bb.1.for.cond: - %5:fpr128 = FMULv8f16 %2, %0 - %6:fpr128 = FADDv8f16 killed %5, %3 + %5:fpr128 = FMULv8f16 %2, %0, implicit $fpcr + %6:fpr128 = FADDv8f16 killed %5, %3, implicit $fpcr STRQui killed %6, %4, 0 :: (store 16 into %ir.ret) B %bb.1 @@ -518,8 +518,8 @@ ; CHECK-NEXT: STRDui killed [[COPY8]], [[COPY1]], 0 :: (store (s64) into %ir.ret2) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.for.cond: - ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY7]], [[INSERT_SUBREG]], 0 - ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY6]] + ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY7]], [[INSERT_SUBREG]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY6]], implicit $fpcr ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY2]], 0 :: (store (s64) into %ir.ret, align 16) ; CHECK-NEXT: B %bb.1 bb.0.entry: @@ -543,8 +543,8 @@ STRDui killed %14, %5, 0 :: (store (s64) into %ir.ret2) bb.1.for.cond: - %15:fpr64 = FMULv2f32 %7, %0 - %16:fpr64 = FADDv2f32 killed %15, %8 + %15:fpr64 = FMULv2f32 %7, %0, implicit $fpcr + %16:fpr64 = FADDv2f32 killed %15, %8, implicit $fpcr STRDui killed %16, %4, 0 :: (store (s64) into %ir.ret, align 16) B %bb.1 @@ -592,8 +592,8 @@ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr64 = COPY [[DUPv2i32lane]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0 - ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]] + ; CHECK-NEXT: [[FMULv2i32_indexed:%[0-9]+]]:fpr64 = FMULv2i32_indexed [[COPY5]], [[INSERT_SUBREG]], 0, implicit $fpcr + ; CHECK-NEXT: [[FADDv2f32_:%[0-9]+]]:fpr64 = FADDv2f32 killed [[FMULv2i32_indexed]], [[COPY4]], implicit $fpcr ; CHECK-NEXT: STRDui killed [[FADDv2f32_]], [[COPY]], 0 :: (store (s64), align 16) ; CHECK-NEXT: B %bb.1 bb.0: @@ -611,8 +611,8 @@ %0:fpr64 = COPY %11 bb.1: - %9:fpr64 = FMULv2f32 %5, %0 - %10:fpr64 = FADDv2f32 killed %9, %6 + %9:fpr64 = FMULv2f32 %5, %0, implicit $fpcr + %10:fpr64 = FADDv2f32 killed %9, %6, implicit $fpcr STRDui killed %10, %4, 0 :: (store 8, align 16) B %bb.1 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir @@ -6,7 +6,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: :fpr32 = nnan ninf nsz arcp contract afn reassoc FMADDSrrr [[B]], [[A]], [[C]] +# CHECK-NEXT: :fpr32 = nnan ninf nsz arcp contract afn reassoc FMADDSrrr [[B]], [[A]], [[C]], implicit $fpcr --- name: scalar_fmadd_fast alignment: 4 @@ -32,8 +32,8 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %3:fpr32 = nnan ninf nsz arcp contract afn reassoc FMULSrr %1, %0 - %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed %3, %2 + %3:fpr32 = nnan ninf nsz arcp contract afn reassoc FMULSrr %1, %0, implicit $fpcr + %4:fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed %3, %2, implicit $fpcr $s0 = COPY %4 RET_ReallyLR implicit $s0 @@ -45,7 +45,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]] +# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]], implicit $fpcr --- name: scalar_fmadd_contract @@ -72,8 +72,8 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %3:fpr32 = contract FMULSrr %1, %0 - %4:fpr32 = contract FADDSrr killed %3, %2 + %3:fpr32 = contract FMULSrr %1, %0, implicit $fpcr + %4:fpr32 = contract FADDSrr killed %3, %2, implicit $fpcr $s0 = COPY %4 RET_ReallyLR implicit $s0 @@ -85,8 +85,8 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = contract FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: [[MUL:%.*]]:fpr32 = contract FMULSrr [[B]], [[A]], implicit $fpcr +# CHECK-NEXT: fpr32 = FADDSrr killed [[MUL]], [[C]], implicit $fpcr --- name: scalar_fmadd_contract_op0 alignment: 4 @@ -112,8 +112,8 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %3:fpr32 = contract FMULSrr %1, %0 - %4:fpr32 = FADDSrr killed %3, %2 + %3:fpr32 = contract FMULSrr %1, %0, implicit $fpcr + %4:fpr32 = FADDSrr killed %3, %2, implicit $fpcr $s0 = COPY %4 RET_ReallyLR implicit $s0 @@ -125,7 +125,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]] +# CHECK-NEXT: :fpr32 = contract FMADDSrrr [[B]], [[A]], [[C]], implicit $fpcr --- name: scalar_fmadd_contract_op1 @@ -152,8 +152,8 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %3:fpr32 = FMULSrr %1, %0 - %4:fpr32 = contract FADDSrr killed %3, %2 + %3:fpr32 = FMULSrr %1, %0, implicit $fpcr + %4:fpr32 = contract FADDSrr killed %3, %2, implicit $fpcr $s0 = COPY %4 RET_ReallyLR implicit $s0 @@ -165,8 +165,8 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = nsz FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = nsz FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: [[MUL:%.*]]:fpr32 = nsz FMULSrr [[B]], [[A]], implicit $fpcr +# CHECK-NEXT: fpr32 = nsz FADDSrr killed [[MUL]], [[C]], implicit $fpcr --- name: scalar_fmadd_nsz @@ -193,8 +193,8 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %3:fpr32 = nsz FMULSrr %1, %0 - %4:fpr32 = nsz FADDSrr killed %3, %2 + %3:fpr32 = nsz FMULSrr %1, %0, implicit $fpcr + %4:fpr32 = nsz FADDSrr killed %3, %2, implicit $fpcr $s0 = COPY %4 RET_ReallyLR implicit $s0 @@ -206,7 +206,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FMLAv2f64 [[C]], [[B]], [[A]] +# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FMLAv2f64 [[C]], [[B]], [[A]], implicit $fpcr --- name: vector_fmadd_fast alignment: 4 @@ -232,8 +232,8 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = nnan ninf nsz arcp contract afn reassoc FMULv2f64 %1, %0 - %4:fpr128 = nnan ninf nsz arcp contract afn reassoc FADDv2f64 killed %3, %2 + %3:fpr128 = nnan ninf nsz arcp contract afn reassoc FMULv2f64 %1, %0, implicit $fpcr + %4:fpr128 = nnan ninf nsz arcp contract afn reassoc FADDv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 @@ -245,7 +245,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]] +# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]], implicit $fpcr --- name: vector_fmadd_contract alignment: 4 @@ -271,8 +271,8 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = contract FMULv2f64 %1, %0 - %4:fpr128 = contract FADDv2f64 killed %3, %2 + %3:fpr128 = contract FMULv2f64 %1, %0, implicit $fpcr + %4:fpr128 = contract FADDv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 @@ -284,8 +284,8 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = contract FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: [[MUL:%.*]]:fpr128 = contract FMULv2f64 [[B]], [[A]], implicit $fpcr +# CHECK-NEXT: fpr128 = FADDv2f64 killed [[MUL]], [[C]], implicit $fpcr --- name: vector_fmadd_contract_op0 alignment: 4 @@ -311,8 +311,8 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = contract FMULv2f64 %1, %0 - %4:fpr128 = FADDv2f64 killed %3, %2 + %3:fpr128 = contract FMULv2f64 %1, %0, implicit $fpcr + %4:fpr128 = FADDv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 @@ -324,7 +324,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]] +# CHECK-NEXT: fpr128 = contract FMLAv2f64 [[C]], [[B]], [[A]], implicit $fpcr --- name: vector_fmadd_contract_op1 @@ -351,8 +351,8 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = FMULv2f64 %1, %0 - %4:fpr128 = contract FADDv2f64 killed %3, %2 + %3:fpr128 = FMULv2f64 %1, %0, implicit $fpcr + %4:fpr128 = contract FADDv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 @@ -364,8 +364,8 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = nsz FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = nsz FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: [[MUL:%.*]]:fpr128 = nsz FMULv2f64 [[B]], [[A]], implicit $fpcr +# CHECK-NEXT: fpr128 = nsz FADDv2f64 killed [[MUL]], [[C]], implicit $fpcr --- name: vector_fmadd_nsz alignment: 4 @@ -391,8 +391,8 @@ %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = nsz FMULv2f64 %1, %0 - %4:fpr128 = nsz FADDv2f64 killed %3, %2 + %3:fpr128 = nsz FMULv2f64 %1, %0, implicit $fpcr + %4:fpr128 = nsz FADDv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir @@ -4,11 +4,11 @@ # fadd without the reassoc flags can be reassociate only when unsafe fp math is # enabled. # CHECK-LABEL: name: fadd_no_reassoc -# CHECK: [[ADD1:%[0-9]+]]:fpr32 = FADDSrr %0, %1 -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2 -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3 -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr %2, %3 -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], killed [[ADD2]] +# CHECK: [[ADD1:%[0-9]+]]:fpr32 = FADDSrr %0, %1, implicit $fpcr +# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr +# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr %2, %3, implicit $fpcr +# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr --- name: fadd_no_reassoc alignment: 4 @@ -38,9 +38,9 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %4:fpr32 = FADDSrr %0, %1 - %5:fpr32 = FADDSrr killed %4, %2 - %6:fpr32 = FADDSrr killed %5, %3 + %4:fpr32 = FADDSrr %0, %1, implicit $fpcr + %5:fpr32 = FADDSrr killed %4, %2, implicit $fpcr + %6:fpr32 = FADDSrr killed %5, %3, implicit $fpcr $s0 = COPY %6 RET_ReallyLR implicit $s0 @@ -48,11 +48,11 @@ # FIXME: We should be able to reassociate without unsafe fp math, but currently # the reassoc flag is ignored. # CHECK-LABEL: name: fadd_reassoc -# CHECK: [[ADD1:%[0-9]+]]:fpr32 = reassoc FADDSrr %0, %1 -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2 -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3 -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr %2, %3 -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], killed [[ADD2]] +# CHECK: [[ADD1:%[0-9]+]]:fpr32 = reassoc FADDSrr %0, %1, implicit $fpcr +# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr +# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr %2, %3, implicit $fpcr +# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr --- name: fadd_reassoc alignment: 4 @@ -82,20 +82,20 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %4:fpr32 = reassoc FADDSrr %0, %1 - %5:fpr32 = reassoc FADDSrr killed %4, %2 - %6:fpr32 = reassoc FADDSrr killed %5, %3 + %4:fpr32 = reassoc FADDSrr %0, %1, implicit $fpcr + %5:fpr32 = reassoc FADDSrr killed %4, %2, implicit $fpcr + %6:fpr32 = reassoc FADDSrr killed %5, %3, implicit $fpcr $s0 = COPY %6 RET_ReallyLR implicit $s0 # Check that flags on the instructions are preserved after reassociation. # CHECK-LABEL: name: fadd_flags -# CHECK: [[ADD1:%[0-9]+]]:fpr32 = nsz FADDSrr %0, %1 -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan FADDSrr killed [[ADD1]], %2 -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf FADDSrr killed [[ADD2]], %3 -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan FADDSrr %2, %3 -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf FADDSrr killed [[ADD1]], killed [[ADD2]] +# CHECK: [[ADD1:%[0-9]+]]:fpr32 = nsz FADDSrr %0, %1, implicit $fpcr +# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf FADDSrr killed [[ADD2]], %3, implicit $fpcr +# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan FADDSrr %2, %3, implicit $fpcr +# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr --- name: fadd_flags alignment: 4 @@ -125,8 +125,8 @@ %2:fpr32 = COPY $s2 %1:fpr32 = COPY $s1 %0:fpr32 = COPY $s0 - %4:fpr32 = nsz FADDSrr %0, %1 - %5:fpr32 = nnan FADDSrr killed %4, %2 - %6:fpr32 = ninf FADDSrr killed %5, %3 + %4:fpr32 = nsz FADDSrr %0, %1, implicit $fpcr + %5:fpr32 = nnan FADDSrr killed %4, %2, implicit $fpcr + %6:fpr32 = ninf FADDSrr killed %5, %3, implicit $fpcr $s0 = COPY %6 RET_ReallyLR implicit $s0 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.mir b/llvm/test/CodeGen/AArch64/machine-combiner.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner.mir @@ -34,10 +34,10 @@ ; CHECK: MADDWrrr %1, %2, %3 %5 = MADDWrrr %1, %2, $wzr %6 = ADDWrr %3, killed %5 - %7 = SCVTFUWDri killed %6 - ; CHECK: FMADDDrrr %7, %7, %0 - %8 = FMULDrr %7, %7 - %9 = FADDDrr %0, killed %8 + %7 = SCVTFUWDri killed %6, implicit $fpcr + ; CHECK: FMADDDrrr %7, %7, %0, implicit $fpcr + %8 = FMULDrr %7, %7, implicit $fpcr + %9 = FADDDrr %0, killed %8, implicit $fpcr $d0 = COPY %9 RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir b/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir --- a/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir +++ b/llvm/test/CodeGen/AArch64/misched-predicate-virtreg.mir @@ -12,7 +12,7 @@ # CHECK-NEXT: Successors: # CHECK-NEXT: SU(1): Data Latency=2 Reg=%0 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(1): %1:fpr32 = FMINVv4i32v %0:fpr128 +# CHECK-NEXT: SU(1): %1:fpr32 = FMINVv4i32v %0:fpr128, implicit $fpcr # CHECK-NEXT: # preds left : 1 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 @@ -31,7 +31,7 @@ bb.0: liveins: $s0, $q1 %0:fpr128 = COPY $q1 - %1:fpr32 = FMINVv4i32v %0:fpr128 + %1:fpr32 = FMINVv4i32v %0:fpr128, implicit $fpcr $s0 = COPY %1 RET_ReallyLR implicit $s0 diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -181,7 +181,7 @@ # CHECK: renamable $q9 = LDRQui $x0, 0 :: (load (s128)) # CHECK-NEXT: renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3 # CHECK-NEXT: STRQui renamable $q9, renamable $x0, 11 :: (store (s128), align 4) -# CHECK-NEXT: renamable $q9 = FADDv2f64 renamable $q9, renamable $q9 +# CHECK-NEXT: renamable $q9 = FADDv2f64 renamable $q9, renamable $q9, implicit $fpcr # CHECK-NEXT: STRQui renamable $q9, renamable $x0, 10 :: (store (s128), align 4) # CHECK-NEXT: RET undef $lr @@ -204,7 +204,7 @@ renamable $q9 = LDRQui $x0, 0 :: (load (s128)) renamable $q9 = XTNv8i16 renamable $q9, killed renamable $q3 STRQui renamable $q9, renamable $x0, 11 :: (store (s128), align 4) - renamable $q9 = FADDv2f64 renamable $q9, renamable $q9 + renamable $q9 = FADDv2f64 renamable $q9, renamable $q9, implicit $fpcr STRQui renamable $q9, renamable $x0, 10 :: (store (s128), align 4) RET undef $lr @@ -325,7 +325,7 @@ # CHECK-LABEL: bb.0: # CHECK: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 -# CHECK: renamable $q7 = FADDv2f64 renamable $q7, renamable $q7 +# CHECK: renamable $q7 = FADDv2f64 renamable $q7, renamable $q7, implicit $fpcr # CHECK-NEXT: STRQui killed renamable $q7, renamable $x0, 100 :: (store (s128), align 4) # CHECK-NEXT: $q7 = LDRQui $x0, 0 :: (load (s128)) # CHECK-NEXT: renamable $q9 = LDRQui $x0, 1 :: (load (s128)) @@ -347,7 +347,7 @@ body: | bb.0: liveins: $x0, $x1, $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7 - renamable $q7 = FADDv2f64 renamable $q7, renamable $q7 + renamable $q7 = FADDv2f64 renamable $q7, renamable $q7, implicit $fpcr STRQui renamable killed $q7, renamable $x0, 100 :: (store (s128), align 4) renamable $q9 = LDRQui $x0, 0 :: (load (s128)) STRQui renamable killed $q9, renamable $x0, 11 :: (store (s128), align 4) diff --git a/llvm/test/CodeGen/AArch64/strict-fp-opt.ll b/llvm/test/CodeGen/AArch64/strict-fp-opt.ll --- a/llvm/test/CodeGen/AArch64/strict-fp-opt.ll +++ b/llvm/test/CodeGen/AArch64/strict-fp-opt.ll @@ -22,6 +22,15 @@ ret void } +; CHECK-LABEL: unused_div_round_dynamic: +; CHECK-NOT: fdiv +; CHECK: ret +define void @unused_div_round_dynamic(float %x, float %y) #0 { +entry: + %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret void +} + ; Machine CSE should eliminate the second add unless we have strict exceptions @@ -71,8 +80,101 @@ ret float %a.0 } +; CHECK-LABEL: add_twice_round_dynamic: +; CHECK: fadd s0, s0, s1 +; CHECK-NEXT: cbz w0, [[LABEL:.LBB[0-9_]+]] +; CHECK-NOT: fadd +; CHECK: fmul s0, s0, s0 +; CHECK: [[LABEL]]: +; CHECK-NEXT: ret +define float @add_twice_round_dynamic(float %x, float %y, i32 %n) #0 { +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %tobool.not = icmp eq i32 %n, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + br label %if.end + +if.end: + %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ] + ret float %a.0 +} + + +; Two adds separated by llvm.set.rounding should be preserved when rounding is +; dynamic (as they may give different results) or when we have strict exceptions +; (the llvm.set.rounding is irrelevant, but both could trap). + +; CHECK-LABEL: set_rounding: +; CHECK-DAG: fadd [[SREG:s[0-9]+]], s0, s1 +; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR +; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000 +; CHECK: msr FPCR, [[XREG2]] +; CHECK-NEXT: mrs [[XREG3:x[0-9]+]], FPCR +; CHECK-NEXT: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff +; CHECK-NEXT: msr FPCR, [[XREG4]] +; CHECK-NEXT: fsub s0, [[SREG]], [[SREG]] +; CHECK-NEXT: ret +define float @set_rounding(float %x, float %y) #0 { +entry: + %add1 = fadd float %x, %y + call void @llvm.set.rounding(i32 0) + %add2 = fadd float %x, %y + call void @llvm.set.rounding(i32 1) + %sub = fsub float %add1, %add2 + ret float %sub +} + +; CHECK-LABEL: set_rounding_fpexcept_strict: +; CHECK-DAG: fadd [[SREG1:s[0-9]+]], s0, s1 +; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR +; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000 +; CHECK: msr FPCR, [[XREG2]] +; CHECK-DAG: fadd [[SREG2:s[0-9]+]], s0, s1 +; CHECK-DAG: mrs [[XREG3:x[0-9]+]], FPCR +; CHECK-DAG: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff +; CHECK-NEXT: msr FPCR, [[XREG4]] +; CHECK-NEXT: fsub s0, [[SREG1]], [[SREG2]] +; CHECK-NEXT: ret +define float @set_rounding_fpexcept_strict(float %x, float %y) #0 { +entry: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + call void @llvm.set.rounding(i32 0) + %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + call void @llvm.set.rounding(i32 1) + %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %sub +} + +; CHECK-LABEL: set_rounding_round_dynamic: +; CHECK-DAG: fadd [[SREG1:s[0-9]+]], s0, s1 +; CHECK-DAG: mrs [[XREG1:x[0-9]+]], FPCR +; CHECK-DAG: orr [[XREG2:x[0-9]+]], [[XREG1]], #0xc00000 +; CHECK: msr FPCR, [[XREG2]] +; CHECK-DAG: fadd [[SREG2:s[0-9]+]], s0, s1 +; CHECK-DAG: mrs [[XREG3:x[0-9]+]], FPCR +; CHECK-DAG: and [[XREG4:x[0-9]+]], [[XREG3]], #0xffffffffff3fffff +; CHECK-NEXT: msr FPCR, [[XREG4]] +; CHECK-NEXT: fsub s0, [[SREG1]], [[SREG2]] +; CHECK-NEXT: ret +define float @set_rounding_round_dynamic(float %x, float %y) #0 { +entry: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @llvm.set.rounding(i32 0) + %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @llvm.set.rounding(i32 1) + %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret float %sub +} + declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #0 +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #0 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #0 declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #0 +declare i32 @llvm.flt.rounds() +declare void @llvm.set.rounding(i32) attributes #0 = { "strictfp" } diff --git a/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir b/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir --- a/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir +++ b/llvm/test/CodeGen/AArch64/taildup-inst-dup-loc.mir @@ -30,7 +30,7 @@ ; CHECK: B %bb.4 ; CHECK: bb.3: ; CHECK: successors: %bb.9(0x80000000) - ; CHECK: [[SCVTFUXDri:%[0-9]+]]:fpr64 = SCVTFUXDri [[DEF2]] + ; CHECK: [[SCVTFUXDri:%[0-9]+]]:fpr64 = SCVTFUXDri [[DEF2]], implicit $fpcr ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SCVTFUXDri]] ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[SCVTFUXDri]] ; CHECK: B %bb.9 @@ -85,7 +85,7 @@ bb.4: successors: %bb.7(0x80000000) - %6:fpr64 = SCVTFUXDri %5 + %6:fpr64 = SCVTFUXDri %5, implicit $fpcr B %bb.7 bb.5: diff --git a/llvm/test/CodeGen/AArch64/wineh-frame1.mir b/llvm/test/CodeGen/AArch64/wineh-frame1.mir --- a/llvm/test/CodeGen/AArch64/wineh-frame1.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame1.mir @@ -74,10 +74,10 @@ liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 $x19 = ADDXrr $x0, killed $x1 - $d8 = FADDDrr killed $d0, $d1 - $d9 = FADDDrr $d8, $d1 - $d10 = FADDDrr $d9, $d8 - $d11 = FADDDrr killed $d9, $d10 + $d8 = FADDDrr killed $d0, $d1, implicit $fpcr + $d9 = FADDDrr $d8, $d1, implicit $fpcr + $d10 = FADDDrr $d9, $d8, implicit $fpcr + $d11 = FADDDrr killed $d9, $d10, implicit $fpcr $x20 = ADDXrr $x19, killed $x0 $x21 = ADDXrr $x20, killed $x19 $x22 = ADDXrr $x21, killed $x20 diff --git a/llvm/test/CodeGen/AArch64/wineh-frame2.mir b/llvm/test/CodeGen/AArch64/wineh-frame2.mir --- a/llvm/test/CodeGen/AArch64/wineh-frame2.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame2.mir @@ -61,11 +61,11 @@ bb.0.entry: liveins: $x0, $x1, $d0, $d1, $d10, $d11, $d8, $d9 $x19 = ADDXrr $x0, killed $x1 - $d8 = FADDDrr killed $d0, $d1 - $d9 = FADDDrr $d8, $d1 - $d10 = FADDDrr $d9, $d8 - $d11 = FADDDrr killed $d9, $d10 - $d12 = FADDDrr $d11, killed $d11 + $d8 = FADDDrr killed $d0, $d1, implicit $fpcr + $d9 = FADDDrr $d8, $d1, implicit $fpcr + $d10 = FADDDrr $d9, $d8, implicit $fpcr + $d11 = FADDDrr killed $d9, $d10, implicit $fpcr + $d12 = FADDDrr $d11, killed $d11, implicit $fpcr $x0 = COPY $d12 RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/wineh-frame4.mir b/llvm/test/CodeGen/AArch64/wineh-frame4.mir --- a/llvm/test/CodeGen/AArch64/wineh-frame4.mir +++ b/llvm/test/CodeGen/AArch64/wineh-frame4.mir @@ -52,8 +52,8 @@ body: | bb.0.entry: liveins: $d0, $d1 - $d8 = FADDDrr $d0, killed $d1 - $d10 = FADDDrr killed $d8, $d0 + $d8 = FADDDrr $d0, killed $d1, implicit $fpcr + $d10 = FADDDrr killed $d8, $d0, implicit $fpcr $x0 = COPY killed $d10 RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/wineh2.mir b/llvm/test/CodeGen/AArch64/wineh2.mir --- a/llvm/test/CodeGen/AArch64/wineh2.mir +++ b/llvm/test/CodeGen/AArch64/wineh2.mir @@ -138,11 +138,11 @@ frame-setup SEH_SaveRegP 19, 20, 112 frame-setup SEH_PrologEnd $x19 = ADDXrr $x0, killed $x1 - $d8 = FADDDrr killed $d0, $d1 - $d9 = FADDDrr $d8, $d1 - $d10 = FADDDrr $d9, $d8 - $d11 = FADDDrr killed $d9, $d10 - $d12 = FADDDrr killed $d10, killed $d11 + $d8 = FADDDrr killed $d0, $d1, implicit $fpcr + $d9 = FADDDrr $d8, $d1, implicit $fpcr + $d10 = FADDDrr $d9, $d8, implicit $fpcr + $d11 = FADDDrr killed $d9, $d10, implicit $fpcr + $d12 = FADDDrr killed $d10, killed $d11, implicit $fpcr $x20 = ADDXrr $x19, killed $x0 $x21 = ADDXrr $x20, killed $x19 $x22 = ADDXrr $x21, killed $x20 diff --git a/llvm/test/CodeGen/AArch64/wineh3.mir b/llvm/test/CodeGen/AArch64/wineh3.mir --- a/llvm/test/CodeGen/AArch64/wineh3.mir +++ b/llvm/test/CodeGen/AArch64/wineh3.mir @@ -119,10 +119,10 @@ frame-setup SEH_SaveRegP 19, 20, 96 frame-setup SEH_PrologEnd $x19 = ADDXrr $x0, killed $x1 - $d8 = FADDDrr killed $d0, $d1 - $d9 = FADDDrr $d8, $d1 - $d10 = FADDDrr $d9, $d8 - $d11 = FADDDrr killed $d9, $d10 + $d8 = FADDDrr killed $d0, $d1, implicit $fpcr + $d9 = FADDDrr $d8, $d1, implicit $fpcr + $d10 = FADDDrr $d9, $d8, implicit $fpcr + $d11 = FADDDrr killed $d9, $d10, implicit $fpcr $x20 = ADDXrr $x19, killed $x0 $x21 = ADDXrr $x20, killed $x19 $x22 = ADDXrr $x21, killed $x20 diff --git a/llvm/test/CodeGen/AArch64/wineh4.mir b/llvm/test/CodeGen/AArch64/wineh4.mir --- a/llvm/test/CodeGen/AArch64/wineh4.mir +++ b/llvm/test/CodeGen/AArch64/wineh4.mir @@ -164,10 +164,10 @@ frame-setup CFI_INSTRUCTION offset $b10, -104 frame-setup CFI_INSTRUCTION offset $b11, -112 $x19 = ADDXrr $x0, killed $x1 - $d8 = FADDDrr killed $d0, $d1 - $d9 = FADDDrr $d8, $d1 - $d10 = FADDDrr $d9, $d8 - $d11 = FADDDrr killed $d9, $d10 + $d8 = FADDDrr killed $d0, $d1, implicit $fpcr + $d9 = FADDDrr $d8, $d1, implicit $fpcr + $d10 = FADDDrr $d9, $d8, implicit $fpcr + $d11 = FADDDrr killed $d9, $d10, implicit $fpcr $x20 = SUBSXrr $x19, killed $x0, implicit-def $nzcv Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 diff --git a/llvm/test/CodeGen/AArch64/wineh8.mir b/llvm/test/CodeGen/AArch64/wineh8.mir --- a/llvm/test/CodeGen/AArch64/wineh8.mir +++ b/llvm/test/CodeGen/AArch64/wineh8.mir @@ -163,10 +163,10 @@ frame-setup CFI_INSTRUCTION offset $b10, -104 frame-setup CFI_INSTRUCTION offset $b11, -112 $x19 = ADDXrr $x0, killed $x1 - $d8 = FADDDrr killed $d0, $d1 - $d9 = FADDDrr $d8, $d1 - $d10 = FADDDrr $d9, $d8 - $d11 = FADDDrr killed $d9, $d10 + $d8 = FADDDrr killed $d0, $d1, implicit $fpcr + $d9 = FADDDrr $d8, $d1, implicit $fpcr + $d10 = FADDDrr $d9, $d8, implicit $fpcr + $d11 = FADDDrr killed $d9, $d10, implicit $fpcr $x20 = SUBSXrr $x19, killed $x0, implicit-def $nzcv Bcc 1, %bb.2, implicit killed $nzcv B %bb.1 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir @@ -55,16 +55,16 @@ STRDui %3, %stack.4, 0 :: (store (s64)) %4:fpr64 = FMOVDi 20 - %5:fpr64 = FADDDrr %2, killed %4 + %5:fpr64 = FADDDrr %2, killed %4, implicit $fpcr STRDui %5, %stack.5, 0 :: (store (s64)) - %6:gpr32 = FCVTZSUWDr %5 + %6:gpr32 = FCVTZSUWDr %5, implicit $fpcr STRDroW %3, %0, killed %6, 1, 1 %7:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load (s64)) %8:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) - %9:gpr32common = FCVTZSUWDr killed %8 + %9:gpr32common = FCVTZSUWDr killed %8, implicit $fpcr %10:fpr64 = LDRDroW %7, %9, 1, 1 %11:gpr32common = ADDWri %9, 1, 0 @@ -73,7 +73,7 @@ %12:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) %13:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load (s64)) - %14:gpr32common = FCVTZSUWDr %12 + %14:gpr32common = FCVTZSUWDr %12, implicit $fpcr %15:gpr32common = ADDWri killed %14, 30, 0 STRDroW %12, killed %13, killed %15, 1, 1 @@ -85,28 +85,28 @@ %43:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) %44:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load (s64)) - %45:gpr32 = FCVTZSUWDr %43 + %45:gpr32 = FCVTZSUWDr %43, implicit $fpcr %47:gpr64common = SMADDLrrr killed %45, %46, killed %44 %48:fpr64 = LDRDui %stack.6, 0 :: (dereferenceable load (s64)) - %49:gpr32 = FCVTZSUWDr killed %48 + %49:gpr32 = FCVTZSUWDr killed %48, implicit $fpcr STRDroW %43, killed %47, killed %49, 1, 1 %21:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load (s64)) %22:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) - %23:gpr32 = FCVTZSUWDr killed %22 + %23:gpr32 = FCVTZSUWDr killed %22, implicit $fpcr %24:gpr32 = MOVi32imm 408 %25:gpr64common = SMADDLrrr %23, %24, killed %21 %26:gpr64sp = ADDXrx killed %25, %23, 51 %27:fpr64 = LDURDi %26, -8 - %29:fpr64 = FADDDrr killed %27, %19 + %29:fpr64 = FADDDrr killed %27, %19, implicit $fpcr STURDi killed %29, %26, -8 %30:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load (s64)) %31:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load (s64)) - %32:gpr32common = FCVTZSUWDr killed %31 + %32:gpr32common = FCVTZSUWDr killed %31, implicit $fpcr %34:gpr64all = IMPLICIT_DEF %33:gpr64 = INSERT_SUBREG %34, %32, %subreg.sub_32 %35:gpr64 = SBFMXri killed %33, 61, 31 diff --git a/llvm/test/DebugInfo/COFF/AArch64/codeview-b-register.mir b/llvm/test/DebugInfo/COFF/AArch64/codeview-b-register.mir --- a/llvm/test/DebugInfo/COFF/AArch64/codeview-b-register.mir +++ b/llvm/test/DebugInfo/COFF/AArch64/codeview-b-register.mir @@ -64,9 +64,9 @@ bb.0: liveins: $b0 DBG_VALUE $b0, $noreg, !11, !DIExpression(), debug-location !13 - renamable $s0 = nofpexcept FCVTSHr killed renamable $h0, debug-location !14 + renamable $s0 = nofpexcept FCVTSHr killed renamable $h0, implicit $fpcr, debug-location !14 DBG_VALUE $b0, $noreg, !11, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !13 - nofpexcept FCMPSri killed renamable $s0, implicit-def $nzcv, debug-location !14 + nofpexcept FCMPSri killed renamable $s0, implicit-def $nzcv, implicit $fpcr, debug-location !14 renamable $w0 = CSINCWr $wzr, $wzr, 0, implicit killed $nzcv, debug-location !14 RET undef $lr, implicit killed $w0 ... diff --git a/llvm/test/DebugInfo/COFF/AArch64/codeview-h-register.mir b/llvm/test/DebugInfo/COFF/AArch64/codeview-h-register.mir --- a/llvm/test/DebugInfo/COFF/AArch64/codeview-h-register.mir +++ b/llvm/test/DebugInfo/COFF/AArch64/codeview-h-register.mir @@ -62,9 +62,9 @@ bb.0: liveins: $h0 DBG_VALUE $h0, $noreg, !11, !DIExpression(), debug-location !13 - renamable $s0 = nofpexcept FCVTSHr killed renamable $h0, debug-location !14 + renamable $s0 = nofpexcept FCVTSHr killed renamable $h0, implicit $fpcr, debug-location !14 DBG_VALUE $h0, $noreg, !11, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !13 - nofpexcept FCMPSri killed renamable $s0, implicit-def $nzcv, debug-location !14 + nofpexcept FCMPSri killed renamable $s0, implicit-def $nzcv, implicit $fpcr, debug-location !14 renamable $w0 = CSINCWr $wzr, $wzr, 0, implicit killed $nzcv, debug-location !14 RET undef $lr, implicit killed $w0 ...