diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -56,14 +56,14 @@ HasDummyMaskOpShift = VLMulShift + 3, HasDummyMaskOpMask = 1 << HasDummyMaskOpShift, - // Does this instruction only update element 0 the destination register. - WritesElement0Shift = HasDummyMaskOpShift + 1, - WritesElement0Mask = 1 << WritesElement0Shift, + // Force a tail agnostic policy even this instruction has a tied destination. + ForceTailAgnosticShift = HasDummyMaskOpShift + 1, + ForceTailAgnosticMask = 1 << ForceTailAgnosticShift, // Does this instruction have a merge operand that must be removed when // converting to MCInst. It will be the first explicit use operand. Used by // RVV Pseudos. - HasMergeOpShift = WritesElement0Shift + 1, + HasMergeOpShift = ForceTailAgnosticShift + 1, HasMergeOpMask = 1 << HasMergeOpShift, // Does this instruction have a SEW operand. It will be the last explicit diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2759,7 +2759,7 @@ static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, int VLIndex, unsigned SEWIndex, - RISCVVLMUL VLMul, bool WritesElement0) { + RISCVVLMUL VLMul, bool ForceTailAgnostic) { MachineFunction &MF = *BB->getParent(); DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -2791,7 +2791,7 @@ // the input is undefined. bool TailAgnostic = true; unsigned UseOpIdx; - if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) { + if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { TailAgnostic = false; // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. const MachineOperand &UseMO = MI.getOperand(UseOpIdx); @@ -2824,11 +2824,11 @@ unsigned NumOperands = MI.getNumExplicitOperands(); int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1; unsigned SEWIndex = NumOperands - 1; - bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask; + bool ForceTailAgnostic = TSFlags & RISCVII::ForceTailAgnosticMask; RISCVVLMUL VLMul = static_cast((TSFlags & RISCVII::VLMulMask) >> RISCVII::VLMulShift); - return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0); + return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); } switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -167,8 +167,8 @@ bit HasDummyMask = 0; let TSFlags{11} = HasDummyMask; - bit WritesElement0 = 0; - let TSFlags{12} = WritesElement0; + bit ForceTailAgnostic = false; + let TSFlags{12} = ForceTailAgnostic; bit HasMergeOp = 0; let TSFlags{13} = HasMergeOp; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -133,6 +133,13 @@ insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const override; + + bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const override; + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const override; + protected: const RISCVSubtarget &STI; }; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -890,3 +890,225 @@ RISCVII::MO_CALL)); return It; } + +// clang-format off +#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \ + RISCV::PseudoV##OP##_##TYPE##_##LMUL##_COMMUTABLE + +#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \ + CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \ + case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \ + case CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \ + case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \ + case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \ + case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \ + case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8) + +#define CASE_VFMA_SPLATS(OP) \ + CASE_VFMA_OPCODE_LMULS(OP, VF16): \ + case CASE_VFMA_OPCODE_LMULS(OP, VF32): \ + case CASE_VFMA_OPCODE_LMULS(OP, VF64) +// clang-format on + +bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + const MCInstrDesc &Desc = MI.getDesc(); + if (!Desc.isCommutable()) + return false; + + switch (MI.getOpcode()) { + case CASE_VFMA_SPLATS(FMADD): + case CASE_VFMA_SPLATS(FMSUB): + case CASE_VFMA_SPLATS(FMACC): + case CASE_VFMA_SPLATS(FMSAC): + case CASE_VFMA_SPLATS(FNMADD): + case CASE_VFMA_SPLATS(FNMSUB): + case CASE_VFMA_SPLATS(FNMACC): + case CASE_VFMA_SPLATS(FNMSAC): + case CASE_VFMA_OPCODE_LMULS(FMACC, VV): + case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): + case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): + case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): { + // For these instructions we can only swap operand 1 and operand 3 by + // changing the opcode. + unsigned CommutableOpIdx1 = 1; + unsigned CommutableOpIdx2 = 3; + if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, + CommutableOpIdx2)) + return false; + return true; + } + case CASE_VFMA_OPCODE_LMULS(FMADD, VV): + case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): + case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): + case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): { + // For these instructions we have more freedom. We can commute with the + // other multiplicand or with the addend/subtrahend/minued. + + // Any fixed operand must be from source 1, 2 or 3. + if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3) + return false; + if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3) + return false; + + // It both ops are fixed one must be the tied source. + if (SrcOpIdx1 != CommuteAnyOperandIndex && + SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1) + return false; + + // Look for two different register operands assumed to be commutable + // regardless of the FMA opcode. The FMA opcode is adjusted later if + // needed. + if (SrcOpIdx1 == CommuteAnyOperandIndex || + SrcOpIdx2 == CommuteAnyOperandIndex) { + // At least one of operands to be commuted is not specified and + // this method is free to choose appropriate commutable operands. + unsigned CommutableOpIdx1 = SrcOpIdx1; + if (SrcOpIdx1 == SrcOpIdx2) { + // Both of operands are not fixed. Set one of commutable + // operands to the tied source. + CommutableOpIdx1 = 1; + } else if (SrcOpIdx1 == CommutableOpIdx1) { + // Only one of the operands is not fixed. + CommutableOpIdx1 = SrcOpIdx2; + } + + // CommutableOpIdx1 is well defined now. Let's choose another commutable + // operand and assign its index to CommutableOpIdx2. + unsigned CommutableOpIdx2; + if (CommutableOpIdx1 != 1) { + // If we haven't already used the tied source, we must use it now. + CommutableOpIdx2 = 1; + } else { + Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg(); + + // The commuted operands should have different registers. + // Otherwise, the commute transformation does not change anything and + // is useless. We use this as a hint to make our decision. + if (Op1Reg != MI.getOperand(2).getReg()) + CommutableOpIdx2 = 2; + else + CommutableOpIdx2 = 3; + } + + // Assign the found pair of commutable indices to SrcOpIdx1 and + // SrcOpIdx2 to return those values. + if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, + CommutableOpIdx2)) + return false; + } + + return true; + } + } + + return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); +} + +#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \ + case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_COMMUTABLE: \ + Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_COMMUTABLE; \ + break; + +#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ + CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) + +#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ + CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF16) \ + CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF32) \ + CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, VF64) + +MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { + if (NewMI) + return *MI.getParent()->getParent()->CloneMachineInstr(&MI); + return MI; + }; + + switch (MI.getOpcode()) { + case CASE_VFMA_SPLATS(FMACC): + case CASE_VFMA_SPLATS(FMADD): + case CASE_VFMA_SPLATS(FMSAC): + case CASE_VFMA_SPLATS(FMSUB): + case CASE_VFMA_SPLATS(FNMACC): + case CASE_VFMA_SPLATS(FNMADD): + case CASE_VFMA_SPLATS(FNMSAC): + case CASE_VFMA_SPLATS(FNMSUB): + case CASE_VFMA_OPCODE_LMULS(FMACC, VV): + case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): + case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): + case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): { + // It only make sense to toggle these between clobbering the + // addend/subtrahend/minuend one of the multiplicands. + assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); + assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index"); + unsigned Opc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD) + CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC) + CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB) + CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC) + CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD) + CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) + CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) + CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) + CASE_VFMA_CHANGE_OPCODE_LMULS(FMACC, FMADD, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(FMSAC, FMSUB, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(FNMACC, FNMADD, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSAC, FNMSUB, VV) + } + + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(Opc)); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } + case CASE_VFMA_OPCODE_LMULS(FMADD, VV): + case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): + case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): + case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): { + assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); + // If one of the operands, is the addend we need to change opcode. + // Otherwise we're just swapping 2 of the multiplicands. + if (OpIdx1 == 3 || OpIdx2 == 3) { + unsigned Opc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + CASE_VFMA_CHANGE_OPCODE_LMULS(FMADD, FMACC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(FMSUB, FMSAC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(FNMADD, FNMACC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSUB, FNMSAC, VV) + } + + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(Opc)); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } + // Let the default code handle it. + break; + } + } + + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); +} + +#undef CASE_VFMA_CHANGE_OPCODE_SPLATS +#undef CASE_VFMA_CHANGE_OPCODE_LMULS +#undef CASE_VFMA_CHANGE_OPCODE_COMMON +#undef CASE_VFMA_SPLATS +#undef CASE_VFMA_OPCODE_LMULS +#undef CASE_VFMA_OPCODE_COMMON diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -445,10 +445,11 @@ !subst("_B32", "", !subst("_B64", "", !subst("_MASK", "", + !subst("_COMMUTABLE", "", !subst("F16", "F", !subst("F32", "F", !subst("F64", "F", - !subst("Pseudo", "", PseudoInst))))))))))))))))))); + !subst("Pseudo", "", PseudoInst)))))))))))))))))))); } class ToLowerCase { @@ -1761,6 +1762,23 @@ multiclass VPseudoTernaryV_VV_VF_AAXA { defm "" : VPseudoTernaryV_VV; defm "" : VPseudoTernaryV_VF_AAXA; + + foreach m = MxList.m in { + // Add a commutable version for use by IR fma. + // NOTE: We need this because we use a tail undisturbed policy on the + // intrinsic version so we can't commute those instructions since it would + // change which input operand is tied to the destination. That would + // remove user control of the tail elements. + let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in { + def "_VV_" # m.MX # "_COMMUTABLE" : VPseudoTernaryNoMask; + foreach f = FPList.fpinfo in + def "_V" # f.FX # "_" # m.MX # "_COMMUTABLE" : + VPseudoTernaryNoMask; + } + } } multiclass VPseudoTernaryV_VX_VI { @@ -1801,7 +1819,7 @@ multiclass VPseudoReductionV_VS { foreach m = MxList.m in { - let WritesElement0 = 1 in + let ForceTailAgnostic = true in defm _VS : VPseudoTernary; } } @@ -3621,8 +3639,8 @@ def PseudoVMV_X_S # "_" # m.MX: Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, ixlenimm:$sew), []>, RISCVVPseudo; - let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, WritesElement0 = 1, - Constraints = "$rd = $rs1" in + let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, + ForceTailAgnostic = true, Constraints = "$rd = $rs1" in def PseudoVMV_S_X # "_" # m.MX: Pseudo<(outs m.vrclass:$rd), (ins m.vrclass:$rs1, GPR:$rs2, GPR:$vl, ixlenimm:$sew), @@ -3648,8 +3666,8 @@ (ins m.vrclass:$rs2, ixlenimm:$sew), []>, RISCVVPseudo; - let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, WritesElement0 = 1, - Constraints = "$rd = $rs1" in + let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, + ForceTailAgnostic = true, Constraints = "$rd = $rs1" in def "PseudoVFMV_S_" # f.FX # "_" # m.MX : Pseudo<(outs m.vrclass:$rd), (ins m.vrclass:$rs1, f.fprclass:$rs2, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -516,22 +516,22 @@ // works best with how TwoAddressInstructionPass tries commuting. def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd, fvti.RegClass:$rs2)), - (!cast("PseudoVFMADD_VV_"# fvti.LMul.MX) + (!cast("PseudoVFMADD_VV_"# fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), - (!cast("PseudoVFMSUB_VV_"# fvti.LMul.MX) + (!cast("PseudoVFMSUB_VV_"# fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), - (!cast("PseudoVFNMADD_VV_"# fvti.LMul.MX) + (!cast("PseudoVFNMADD_VV_"# fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd, fvti.RegClass:$rs2)), - (!cast("PseudoVFNMSUB_VV_"# fvti.LMul.MX) + (!cast("PseudoVFNMSUB_VV_"# fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; @@ -539,35 +539,35 @@ // commutable. def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), fvti.RegClass:$rd, fvti.RegClass:$rs2)), - (!cast("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + (!cast("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), - (!cast("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + (!cast("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), (fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))), - (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), (fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)), - (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; // The splat might be negated. def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)), fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), - (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)), fvti.RegClass:$rd, fvti.RegClass:$rs2)), - (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX # "_COMMUTABLE") fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.SEW)>; } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll @@ -12,9 +12,8 @@ define @vfmadd_vv_nxv1f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu -; CHECK-NEXT: vfmadd.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %vd = call @llvm.fma.v1f16( %va, %vb, %vc) ret %vd @@ -23,7 +22,7 @@ define @vfmadd_vf_nxv1f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmadd_vf_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -37,9 +36,8 @@ define @vfmadd_vv_nxv2f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu -; CHECK-NEXT: vfmadd.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v8, v10, v9 ; CHECK-NEXT: ret %vd = call @llvm.fma.v2f16( %va, %vc, %vb) ret %vd @@ -48,9 +46,8 @@ define @vfmadd_vf_nxv2f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmadd_vf_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu -; CHECK-NEXT: vfmadd.vf v9, fa0, v8 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -63,7 +60,7 @@ define @vfmadd_vv_nxv4f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %vd = call @llvm.fma.v4f16( %vb, %va, %vc) @@ -73,7 +70,7 @@ define @vfmadd_vf_nxv4f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmadd_vf_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -87,9 +84,8 @@ define @vfmadd_vv_nxv8f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfmadd.vv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vv v8, v12, v10 ; CHECK-NEXT: ret %vd = call @llvm.fma.v8f16( %vb, %vc, %va) ret %vd @@ -98,9 +94,8 @@ define @vfmadd_vf_nxv8f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmadd_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfmadd.vf v10, fa0, v8 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -113,7 +108,7 @@ define @vfmadd_vv_nxv16f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmadd.vv v8, v16, v12 ; CHECK-NEXT: ret %vd = call @llvm.fma.v16f16( %vc, %va, %vb) @@ -123,7 +118,7 @@ define @vfmadd_vf_nxv16f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmadd_vf_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -139,9 +134,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu -; CHECK-NEXT: vfmadd.vv v16, v24, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vfmacc.vv v8, v16, v24 ; CHECK-NEXT: ret %vd = call @llvm.fma.v32f16( %vc, %vb, %va) ret %vd @@ -150,9 +143,8 @@ define @vfmadd_vf_nxv32f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmadd_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu -; CHECK-NEXT: vfmadd.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -165,9 +157,8 @@ define @vfmadd_vv_nxv1f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu -; CHECK-NEXT: vfmadd.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %vd = call @llvm.fma.v1f32( %va, %vb, %vc) ret %vd @@ -176,7 +167,7 @@ define @vfmadd_vf_nxv1f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmadd_vf_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -190,9 +181,8 @@ define @vfmadd_vv_nxv2f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu -; CHECK-NEXT: vfmadd.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v8, v10, v9 ; CHECK-NEXT: ret %vd = call @llvm.fma.v2f32( %va, %vc, %vb) ret %vd @@ -201,9 +191,8 @@ define @vfmadd_vf_nxv2f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmadd_vf_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu -; CHECK-NEXT: vfmadd.vf v9, fa0, v8 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -216,7 +205,7 @@ define @vfmadd_vv_nxv4f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret %vd = call @llvm.fma.v4f32( %vb, %va, %vc) @@ -226,7 +215,7 @@ define @vfmadd_vf_nxv4f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmadd_vf_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -240,9 +229,8 @@ define @vfmadd_vv_nxv8f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfmadd.vv v16, v12, v8 -; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vv v8, v16, v12 ; CHECK-NEXT: ret %vd = call @llvm.fma.v8f32( %vb, %vc, %va) ret %vd @@ -251,9 +239,8 @@ define @vfmadd_vf_nxv8f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmadd_vf_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfmadd.vf v12, fa0, v8 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -268,7 +255,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu ; CHECK-NEXT: vfmadd.vv v8, v24, v16 ; CHECK-NEXT: ret %vd = call @llvm.fma.v16f32( %vc, %va, %vb) @@ -278,7 +264,7 @@ define @vfmadd_vf_nxv16f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmadd_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -292,9 +278,8 @@ define @vfmadd_vv_nxv1f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu -; CHECK-NEXT: vfmadd.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %vd = call @llvm.fma.v1f64( %va, %vb, %vc) ret %vd @@ -303,7 +288,7 @@ define @vfmadd_vf_nxv1f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmadd_vf_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -317,9 +302,8 @@ define @vfmadd_vv_nxv2f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu -; CHECK-NEXT: vfmadd.vv v12, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfmadd.vv v8, v12, v10 ; CHECK-NEXT: ret %vd = call @llvm.fma.v2f64( %va, %vc, %vb) ret %vd @@ -328,9 +312,8 @@ define @vfmadd_vf_nxv2f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmadd_vf_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu -; CHECK-NEXT: vfmadd.vf v10, fa0, v8 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -343,7 +326,7 @@ define @vfmadd_vv_nxv4f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret %vd = call @llvm.fma.v4f64( %vb, %va, %vc) @@ -353,7 +336,7 @@ define @vfmadd_vf_nxv4f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmadd_vf_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -369,9 +352,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfmadd.vv v24, v16, v8 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfmacc.vv v8, v16, v24 ; CHECK-NEXT: ret %vd = call @llvm.fma.v8f64( %vb, %vc, %va) ret %vd @@ -380,9 +361,8 @@ define @vfmadd_vf_nxv8f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmadd_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfmadd.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll @@ -12,9 +12,8 @@ define @vfmsub_vv_nxv1f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu -; CHECK-NEXT: vfmsub.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vc %vd = call @llvm.fma.v1f16( %va, %vb, %neg) @@ -24,7 +23,7 @@ define @vfmsub_vf_nxv1f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmsub_vf_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -39,9 +38,8 @@ define @vfmsub_vv_nxv2f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu -; CHECK-NEXT: vfmsub.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v2f16( %va, %vc, %neg) @@ -51,9 +49,8 @@ define @vfmsub_vf_nxv2f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmsub_vf_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu -; CHECK-NEXT: vfmsub.vf v9, fa0, v8 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -67,7 +64,7 @@ define @vfmsub_vv_nxv4f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vc @@ -78,7 +75,7 @@ define @vfmsub_vf_nxv4f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmsub_vf_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -93,9 +90,8 @@ define @vfmsub_vv_nxv8f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfmsub.vv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vv v8, v12, v10 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v8f16( %vb, %vc, %neg) @@ -105,9 +101,8 @@ define @vfmsub_vf_nxv8f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmsub_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfmsub.vf v10, fa0, v8 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -121,7 +116,7 @@ define @vfmsub_vv_nxv16f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmsub.vv v8, v16, v12 ; CHECK-NEXT: ret %neg = fneg %vb @@ -132,7 +127,7 @@ define @vfmsub_vf_nxv16f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmsub_vf_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -149,9 +144,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu -; CHECK-NEXT: vfmsub.vv v16, v24, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vfmsac.vv v8, v16, v24 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v32f16( %vc, %vb, %neg) @@ -161,9 +154,8 @@ define @vfmsub_vf_nxv32f16( %va, %vb, half %c) { ; CHECK-LABEL: vfmsub_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu -; CHECK-NEXT: vfmsub.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -177,9 +169,8 @@ define @vfmsub_vv_nxv1f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu -; CHECK-NEXT: vfmsub.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vc %vd = call @llvm.fma.v1f32( %va, %vb, %neg) @@ -189,7 +180,7 @@ define @vfmsub_vf_nxv1f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmsub_vf_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -204,9 +195,8 @@ define @vfmsub_vv_nxv2f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu -; CHECK-NEXT: vfmsub.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v2f32( %va, %vc, %neg) @@ -216,9 +206,8 @@ define @vfmsub_vf_nxv2f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmsub_vf_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu -; CHECK-NEXT: vfmsub.vf v9, fa0, v8 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -232,7 +221,7 @@ define @vfmsub_vv_nxv4f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmsub.vv v8, v10, v12 ; CHECK-NEXT: ret %neg = fneg %vc @@ -243,7 +232,7 @@ define @vfmsub_vf_nxv4f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmsub_vf_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -258,9 +247,8 @@ define @vfmsub_vv_nxv8f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfmsub.vv v16, v12, v8 -; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vv v8, v16, v12 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v8f32( %vb, %vc, %neg) @@ -270,9 +258,8 @@ define @vfmsub_vf_nxv8f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmsub_vf_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfmsub.vf v12, fa0, v8 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -288,7 +275,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu ; CHECK-NEXT: vfmsub.vv v8, v24, v16 ; CHECK-NEXT: ret %neg = fneg %vb @@ -299,7 +285,7 @@ define @vfmsub_vf_nxv16f32( %va, %vb, float %c) { ; CHECK-LABEL: vfmsub_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -314,9 +300,8 @@ define @vfmsub_vv_nxv1f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu -; CHECK-NEXT: vfmsub.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vc %vd = call @llvm.fma.v1f64( %va, %vb, %neg) @@ -326,7 +311,7 @@ define @vfmsub_vf_nxv1f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmsub_vf_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -341,9 +326,8 @@ define @vfmsub_vv_nxv2f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu -; CHECK-NEXT: vfmsub.vv v12, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfmsub.vv v8, v12, v10 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v2f64( %va, %vc, %neg) @@ -353,9 +337,8 @@ define @vfmsub_vf_nxv2f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmsub_vf_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu -; CHECK-NEXT: vfmsub.vf v10, fa0, v8 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -369,7 +352,7 @@ define @vfmsub_vv_nxv4f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmsub.vv v8, v12, v16 ; CHECK-NEXT: ret %neg = fneg %vc @@ -380,7 +363,7 @@ define @vfmsub_vf_nxv4f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmsub_vf_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -397,9 +380,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfmsub.vv v24, v16, v8 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfmsac.vv v8, v16, v24 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v8f64( %vb, %vc, %neg) @@ -409,9 +390,8 @@ define @vfmsub_vf_nxv8f64( %va, %vb, double %c) { ; CHECK-LABEL: vfmsub_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfmsub.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll @@ -12,9 +12,8 @@ define @vfnmsub_vv_nxv1f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu -; CHECK-NEXT: vfnmadd.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %va %neg2 = fneg %vc @@ -25,7 +24,7 @@ define @vfnmsub_vf_nxv1f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -41,9 +40,8 @@ define @vfnmsub_vv_nxv2f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu -; CHECK-NEXT: vfnmadd.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfnmadd.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %va %neg2 = fneg %vb @@ -54,7 +52,7 @@ define @vfnmsub_vf_nxv2f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -70,7 +68,7 @@ define @vfnmsub_vv_nxv4f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vb @@ -82,7 +80,7 @@ define @vfnmsub_vf_nxv4f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -98,9 +96,8 @@ define @vfnmsub_vv_nxv8f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfnmadd.vv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vv v8, v12, v10 ; CHECK-NEXT: ret %neg = fneg %vb %neg2 = fneg %va @@ -111,9 +108,8 @@ define @vfnmsub_vf_nxv8f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfnmadd.vf v10, fa0, v8 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -128,7 +124,7 @@ define @vfnmsub_vv_nxv16f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfnmadd.vv v8, v16, v12 ; CHECK-NEXT: ret %neg = fneg %vc @@ -140,7 +136,7 @@ define @vfnmsub_vf_nxv16f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -158,7 +154,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu ; CHECK-NEXT: vfnmadd.vv v8, v24, v16 ; CHECK-NEXT: ret %neg = fneg %vc @@ -170,9 +165,8 @@ define @vfnmsub_vf_nxv32f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu -; CHECK-NEXT: vfnmadd.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -187,7 +181,7 @@ define @vfnmsub_vv_nxv1f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vb @@ -199,7 +193,7 @@ define @vfnmsub_vf_nxv1f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -215,7 +209,7 @@ define @vfnmsub_vv_nxv2f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu ; CHECK-NEXT: vfnmadd.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %vc @@ -227,7 +221,7 @@ define @vfnmsub_vf_nxv2f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -243,9 +237,8 @@ define @vfnmsub_vv_nxv4f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu -; CHECK-NEXT: vfnmadd.vv v10, v8, v12 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret %neg = fneg %va %neg2 = fneg %vc @@ -256,7 +249,7 @@ define @vfnmsub_vf_nxv4f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -272,9 +265,8 @@ define @vfnmsub_vv_nxv8f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfnmadd.vv v12, v16, v8 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vv v8, v16, v12 ; CHECK-NEXT: ret %neg = fneg %vc %neg2 = fneg %va @@ -285,9 +277,8 @@ define @vfnmsub_vf_nxv8f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfnmadd.vf v12, fa0, v8 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -304,9 +295,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu -; CHECK-NEXT: vfnmadd.vv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfnmadd.vv v8, v24, v16 ; CHECK-NEXT: ret %neg = fneg %va %neg2 = fneg %vb @@ -317,7 +306,7 @@ define @vfnmsub_vf_nxv16f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -333,9 +322,8 @@ define @vfnmsub_vv_nxv1f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu -; CHECK-NEXT: vfnmadd.vv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfnmacc.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %vb %neg2 = fneg %va @@ -346,7 +334,7 @@ define @vfnmsub_vf_nxv1f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -362,9 +350,8 @@ define @vfnmsub_vv_nxv2f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu -; CHECK-NEXT: vfnmadd.vv v12, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfnmadd.vv v8, v12, v10 ; CHECK-NEXT: ret %neg = fneg %va %neg2 = fneg %vb @@ -375,7 +362,7 @@ define @vfnmsub_vf_nxv2f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -391,7 +378,7 @@ define @vfnmsub_vv_nxv4f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret %neg = fneg %vb @@ -403,7 +390,7 @@ define @vfnmsub_vf_nxv4f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -421,9 +408,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfnmadd.vv v24, v16, v8 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfnmacc.vv v8, v16, v24 ; CHECK-NEXT: ret %neg = fneg %vb %neg2 = fneg %va @@ -434,9 +419,8 @@ define @vfnmsub_vf_nxv8f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfnmadd.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll @@ -12,9 +12,8 @@ define @vfnmsub_vv_nxv1f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu -; CHECK-NEXT: vfnmsub.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v1f16( %neg, %vb, %vc) @@ -24,7 +23,7 @@ define @vfnmsub_vf_nxv1f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -39,9 +38,8 @@ define @vfnmsub_vv_nxv2f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu -; CHECK-NEXT: vfnmsub.vv v10, v8, v9 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfnmsub.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v2f16( %neg, %vc, %vb) @@ -51,7 +49,7 @@ define @vfnmsub_vf_nxv2f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -66,7 +64,7 @@ define @vfnmsub_vv_nxv4f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfnmsub.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vb @@ -77,7 +75,7 @@ define @vfnmsub_vf_nxv4f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -92,9 +90,8 @@ define @vfnmsub_vv_nxv8f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfnmsub.vv v12, v10, v8 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vv v8, v12, v10 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v8f16( %neg, %vc, %va) @@ -104,9 +101,8 @@ define @vfnmsub_vf_nxv8f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu -; CHECK-NEXT: vfnmsub.vf v10, fa0, v8 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -120,7 +116,7 @@ define @vfnmsub_vv_nxv16f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfnmsub.vv v8, v16, v12 ; CHECK-NEXT: ret %neg = fneg %vc @@ -131,7 +127,7 @@ define @vfnmsub_vf_nxv16f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv16f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 @@ -148,7 +144,6 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vle16.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu ; CHECK-NEXT: vfnmsub.vv v8, v24, v16 ; CHECK-NEXT: ret %neg = fneg %vc @@ -159,9 +154,8 @@ define @vfnmsub_vf_nxv32f16( %va, %vb, half %c) { ; CHECK-LABEL: vfnmsub_vf_nxv32f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu -; CHECK-NEXT: vfnmsub.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, half %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -175,7 +169,7 @@ define @vfnmsub_vv_nxv1f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfnmsub.vv v8, v9, v10 ; CHECK-NEXT: ret %neg = fneg %vb @@ -186,7 +180,7 @@ define @vfnmsub_vf_nxv1f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -201,7 +195,7 @@ define @vfnmsub_vv_nxv2f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu ; CHECK-NEXT: vfnmsub.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %vc @@ -212,7 +206,7 @@ define @vfnmsub_vf_nxv2f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -227,9 +221,8 @@ define @vfnmsub_vv_nxv4f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu -; CHECK-NEXT: vfnmsub.vv v10, v8, v12 -; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v8, v10, v12 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v4f32( %vb, %neg, %vc) @@ -239,7 +232,7 @@ define @vfnmsub_vf_nxv4f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -254,9 +247,8 @@ define @vfnmsub_vv_nxv8f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfnmsub.vv v12, v16, v8 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vv v8, v16, v12 ; CHECK-NEXT: ret %neg = fneg %vc %vd = call @llvm.fma.v8f32( %vb, %neg, %va) @@ -266,9 +258,8 @@ define @vfnmsub_vf_nxv8f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu -; CHECK-NEXT: vfnmsub.vf v12, fa0, v8 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -284,9 +275,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu -; CHECK-NEXT: vfnmsub.vv v24, v8, v16 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfnmsub.vv v8, v24, v16 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v16f32( %vc, %neg, %vb) @@ -296,7 +285,7 @@ define @vfnmsub_vf_nxv16f32( %va, %vb, float %c) { ; CHECK-LABEL: vfnmsub_vf_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, float %c, i32 0 @@ -311,9 +300,8 @@ define @vfnmsub_vv_nxv1f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu -; CHECK-NEXT: vfnmsub.vv v10, v9, v8 -; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfnmsac.vv v8, v10, v9 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v1f64( %vc, %neg, %va) @@ -323,7 +311,7 @@ define @vfnmsub_vf_nxv1f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -338,9 +326,8 @@ define @vfnmsub_vv_nxv2f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu -; CHECK-NEXT: vfnmsub.vv v12, v8, v10 -; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfnmsub.vv v8, v12, v10 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v2f64( %neg, %vc, %vb) @@ -350,7 +337,7 @@ define @vfnmsub_vf_nxv2f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -365,7 +352,7 @@ define @vfnmsub_vv_nxv4f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfnmsub.vv v8, v12, v16 ; CHECK-NEXT: ret %neg = fneg %vb @@ -376,7 +363,7 @@ define @vfnmsub_vf_nxv4f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 @@ -393,9 +380,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfnmsub.vv v24, v16, v8 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfnmsac.vv v8, v16, v24 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v8f64( %neg, %vc, %va) @@ -405,9 +390,8 @@ define @vfnmsub_vf_nxv8f64( %va, %vb, double %c) { ; CHECK-LABEL: vfnmsub_vf_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu -; CHECK-NEXT: vfnmsub.vf v16, fa0, v8 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v16 ; CHECK-NEXT: ret %head = insertelement undef, double %c, i32 0 %splat = shufflevector %head, undef, zeroinitializer