Skip to content

Commit 7e27885

Browse files
committedJul 28, 2016
[X86] Remove CustomInserter for FMA3 instructions. Looks like since we got full commuting support for FMAs after this was added, the coalescer can now get this right on its own.
Differential Revision: https://reviews.llvm.org/D22799 llvm-svn: 276987
1 parent 313755d commit 7e27885

File tree

3 files changed

+2
-196
lines changed

3 files changed

+2
-196
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 191 deletions
Original file line numberDiff line numberDiff line change
@@ -24236,164 +24236,6 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
2423624236
return BB;
2423724237
}
2423824238

24239-
// Replace 213-type (isel default) FMA3 instructions with 231-type for
24240-
// accumulator loops. Writing back to the accumulator allows the coalescer
24241-
// to remove extra copies in the loop.
24242-
// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937).
24243-
MachineBasicBlock *
24244-
X86TargetLowering::emitFMA3Instr(MachineInstr &MI,
24245-
MachineBasicBlock *MBB) const {
24246-
MachineOperand &AddendOp = MI.getOperand(3);
24247-
24248-
// Bail out early if the addend isn't a register - we can't switch these.
24249-
if (!AddendOp.isReg())
24250-
return MBB;
24251-
24252-
MachineFunction &MF = *MBB->getParent();
24253-
MachineRegisterInfo &MRI = MF.getRegInfo();
24254-
24255-
// Check whether the addend is defined by a PHI:
24256-
assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?");
24257-
MachineInstr &AddendDef = *MRI.def_instr_begin(AddendOp.getReg());
24258-
if (!AddendDef.isPHI())
24259-
return MBB;
24260-
24261-
// Look for the following pattern:
24262-
// loop:
24263-
// %addend = phi [%entry, 0], [%loop, %result]
24264-
// ...
24265-
// %result<tied1> = FMA213 %m2<tied0>, %m1, %addend
24266-
24267-
// Replace with:
24268-
// loop:
24269-
// %addend = phi [%entry, 0], [%loop, %result]
24270-
// ...
24271-
// %result<tied1> = FMA231 %addend<tied0>, %m1, %m2
24272-
24273-
for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) {
24274-
assert(AddendDef.getOperand(i).isReg());
24275-
MachineOperand PHISrcOp = AddendDef.getOperand(i);
24276-
MachineInstr &PHISrcInst = *MRI.def_instr_begin(PHISrcOp.getReg());
24277-
if (&PHISrcInst == &MI) {
24278-
// Found a matching instruction.
24279-
unsigned NewFMAOpc = 0;
24280-
switch (MI.getOpcode()) {
24281-
case X86::VFMADD213PDr:
24282-
NewFMAOpc = X86::VFMADD231PDr;
24283-
break;
24284-
case X86::VFMADD213PSr:
24285-
NewFMAOpc = X86::VFMADD231PSr;
24286-
break;
24287-
case X86::VFMADD213SDr:
24288-
NewFMAOpc = X86::VFMADD231SDr;
24289-
break;
24290-
case X86::VFMADD213SSr:
24291-
NewFMAOpc = X86::VFMADD231SSr;
24292-
break;
24293-
case X86::VFMSUB213PDr:
24294-
NewFMAOpc = X86::VFMSUB231PDr;
24295-
break;
24296-
case X86::VFMSUB213PSr:
24297-
NewFMAOpc = X86::VFMSUB231PSr;
24298-
break;
24299-
case X86::VFMSUB213SDr:
24300-
NewFMAOpc = X86::VFMSUB231SDr;
24301-
break;
24302-
case X86::VFMSUB213SSr:
24303-
NewFMAOpc = X86::VFMSUB231SSr;
24304-
break;
24305-
case X86::VFNMADD213PDr:
24306-
NewFMAOpc = X86::VFNMADD231PDr;
24307-
break;
24308-
case X86::VFNMADD213PSr:
24309-
NewFMAOpc = X86::VFNMADD231PSr;
24310-
break;
24311-
case X86::VFNMADD213SDr:
24312-
NewFMAOpc = X86::VFNMADD231SDr;
24313-
break;
24314-
case X86::VFNMADD213SSr:
24315-
NewFMAOpc = X86::VFNMADD231SSr;
24316-
break;
24317-
case X86::VFNMSUB213PDr:
24318-
NewFMAOpc = X86::VFNMSUB231PDr;
24319-
break;
24320-
case X86::VFNMSUB213PSr:
24321-
NewFMAOpc = X86::VFNMSUB231PSr;
24322-
break;
24323-
case X86::VFNMSUB213SDr:
24324-
NewFMAOpc = X86::VFNMSUB231SDr;
24325-
break;
24326-
case X86::VFNMSUB213SSr:
24327-
NewFMAOpc = X86::VFNMSUB231SSr;
24328-
break;
24329-
case X86::VFMADDSUB213PDr:
24330-
NewFMAOpc = X86::VFMADDSUB231PDr;
24331-
break;
24332-
case X86::VFMADDSUB213PSr:
24333-
NewFMAOpc = X86::VFMADDSUB231PSr;
24334-
break;
24335-
case X86::VFMSUBADD213PDr:
24336-
NewFMAOpc = X86::VFMSUBADD231PDr;
24337-
break;
24338-
case X86::VFMSUBADD213PSr:
24339-
NewFMAOpc = X86::VFMSUBADD231PSr;
24340-
break;
24341-
24342-
case X86::VFMADD213PDYr:
24343-
NewFMAOpc = X86::VFMADD231PDYr;
24344-
break;
24345-
case X86::VFMADD213PSYr:
24346-
NewFMAOpc = X86::VFMADD231PSYr;
24347-
break;
24348-
case X86::VFMSUB213PDYr:
24349-
NewFMAOpc = X86::VFMSUB231PDYr;
24350-
break;
24351-
case X86::VFMSUB213PSYr:
24352-
NewFMAOpc = X86::VFMSUB231PSYr;
24353-
break;
24354-
case X86::VFNMADD213PDYr:
24355-
NewFMAOpc = X86::VFNMADD231PDYr;
24356-
break;
24357-
case X86::VFNMADD213PSYr:
24358-
NewFMAOpc = X86::VFNMADD231PSYr;
24359-
break;
24360-
case X86::VFNMSUB213PDYr:
24361-
NewFMAOpc = X86::VFNMSUB231PDYr;
24362-
break;
24363-
case X86::VFNMSUB213PSYr:
24364-
NewFMAOpc = X86::VFNMSUB231PSYr;
24365-
break;
24366-
case X86::VFMADDSUB213PDYr:
24367-
NewFMAOpc = X86::VFMADDSUB231PDYr;
24368-
break;
24369-
case X86::VFMADDSUB213PSYr:
24370-
NewFMAOpc = X86::VFMADDSUB231PSYr;
24371-
break;
24372-
case X86::VFMSUBADD213PDYr:
24373-
NewFMAOpc = X86::VFMSUBADD231PDYr;
24374-
break;
24375-
case X86::VFMSUBADD213PSYr:
24376-
NewFMAOpc = X86::VFMSUBADD231PSYr;
24377-
break;
24378-
default:
24379-
llvm_unreachable("Unrecognized FMA variant.");
24380-
}
24381-
24382-
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
24383-
MachineInstrBuilder MIB =
24384-
BuildMI(MF, MI.getDebugLoc(), TII.get(NewFMAOpc))
24385-
.addOperand(MI.getOperand(0))
24386-
.addOperand(MI.getOperand(3))
24387-
.addOperand(MI.getOperand(2))
24388-
.addOperand(MI.getOperand(1));
24389-
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
24390-
MI.eraseFromParent();
24391-
}
24392-
}
24393-
24394-
return MBB;
24395-
}
24396-
2439724239
MachineBasicBlock *
2439824240
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2439924241
MachineBasicBlock *BB) const {
@@ -24616,39 +24458,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2461624458
case TargetOpcode::PATCHPOINT:
2461724459
return emitPatchPoint(MI, BB);
2461824460

24619-
case X86::VFMADD213PDr:
24620-
case X86::VFMADD213PSr:
24621-
case X86::VFMADD213SDr:
24622-
case X86::VFMADD213SSr:
24623-
case X86::VFMSUB213PDr:
24624-
case X86::VFMSUB213PSr:
24625-
case X86::VFMSUB213SDr:
24626-
case X86::VFMSUB213SSr:
24627-
case X86::VFNMADD213PDr:
24628-
case X86::VFNMADD213PSr:
24629-
case X86::VFNMADD213SDr:
24630-
case X86::VFNMADD213SSr:
24631-
case X86::VFNMSUB213PDr:
24632-
case X86::VFNMSUB213PSr:
24633-
case X86::VFNMSUB213SDr:
24634-
case X86::VFNMSUB213SSr:
24635-
case X86::VFMADDSUB213PDr:
24636-
case X86::VFMADDSUB213PSr:
24637-
case X86::VFMSUBADD213PDr:
24638-
case X86::VFMSUBADD213PSr:
24639-
case X86::VFMADD213PDYr:
24640-
case X86::VFMADD213PSYr:
24641-
case X86::VFMSUB213PDYr:
24642-
case X86::VFMSUB213PSYr:
24643-
case X86::VFNMADD213PDYr:
24644-
case X86::VFNMADD213PSYr:
24645-
case X86::VFNMSUB213PDYr:
24646-
case X86::VFNMSUB213PSYr:
24647-
case X86::VFMADDSUB213PDYr:
24648-
case X86::VFMADDSUB213PSYr:
24649-
case X86::VFMSUBADD213PDYr:
24650-
case X86::VFMSUBADD213PSYr:
24651-
return emitFMA3Instr(MI, BB);
2465224461
case X86::LCMPXCHG8B_SAVE_EBX:
2465324462
case X86::LCMPXCHG16B_SAVE_RBX: {
2465424463
unsigned BasePtr =

‎llvm/lib/Target/X86/X86InstrFMA.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
3939
PatFrag MemFrag128, PatFrag MemFrag256,
4040
ValueType OpVT128, ValueType OpVT256,
4141
SDPatternOperator Op = null_frag> {
42-
let usesCustomInserter = 1 in
4342
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
4443
(ins VR128:$src1, VR128:$src2, VR128:$src3),
4544
!strconcat(OpcodeStr,
@@ -55,7 +54,6 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
5554
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
5655
(MemFrag128 addr:$src3))))]>;
5756

58-
let usesCustomInserter = 1 in
5957
def Yr : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
6058
(ins VR256:$src1, VR256:$src2, VR256:$src3),
6159
!strconcat(OpcodeStr,
@@ -144,7 +142,6 @@ let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
144142
multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
145143
X86MemOperand x86memop, RegisterClass RC,
146144
SDPatternOperator OpNode = null_frag> {
147-
let usesCustomInserter = 1 in
148145
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
149146
(ins RC:$src1, RC:$src2, RC:$src3),
150147
!strconcat(OpcodeStr,

‎llvm/test/CodeGen/X86/fma-do-not-commute.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ target triple = "x86_64-apple-macosx"
99
; CHECK-NOT: {{.*}}, %xmm0
1010
; %addr lives in rdi.
1111
; %addr2 lives in rsi.
12-
; CHECK: vmovss (%rsi), [[ADDR2:%xmm[0-9]+]]
12+
; CHECK: vmovss (%rdi), [[ADDR:%xmm[0-9]+]]
1313
; The assembly syntax is in the reverse order.
14-
; CHECK: vfmadd231ss (%rdi), [[ADDR2]], %xmm0
14+
; CHECK: vfmadd231ss (%rsi), [[ADDR]], %xmm0
1515
define void @test1(float* %addr, float* %addr2, float %arg) {
1616
entry:
1717
br label %loop

0 commit comments

Comments
 (0)
Please sign in to comment.