Skip to content

Commit 9bff3b7

Browse files
committedMar 31, 2017
[mips][msa] Prevent output operand from commuting for dpadd_[su].df ins
Implementation of TargetInstrInfo::findCommutedOpIndices for MIPS target, restricting commutativity to second and third operand only for dpaadd_[su].df instructions therein. Prior to this change, there were cases where the vector that is to be added to the dot product of the other two could take a position other than the first one in the instruction, generating false output in the destination vector. Such behavior has been noticed in the two functions generating v2i64 output values so far. Other ones may exhibit such behavior as well, just not for the vector operands which are present in the test at the moment. Tests altered so that the function's first operand is a constant splat so that it can be loaded with a ldi instruction, since that is the case in which the erroneous instruction operand placement has occurred. We check that the register which is present in the ldi instruction is placed as the first operand in the corresponding dpadd instruction. Patch by Stefan Maksimovic. Differential Revision: https://reviews.llvm.org/D30827 llvm-svn: 299223
1 parent a11dbf2 commit 9bff3b7

File tree

3 files changed

+67
-48
lines changed

3 files changed

+67
-48
lines changed
 

‎llvm/lib/Target/Mips/MipsInstrInfo.cpp

+28
Original file line numberDiff line numberDiff line change
@@ -501,3 +501,31 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
501501
MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
502502
return MIB;
503503
}
504+
505+
bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
506+
unsigned &SrcOpIdx2) const {
507+
assert(!MI.isBundle() &&
508+
"TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
509+
510+
const MCInstrDesc &MCID = MI.getDesc();
511+
if (!MCID.isCommutable())
512+
return false;
513+
514+
switch (MI.getOpcode()) {
515+
case Mips::DPADD_U_H:
516+
case Mips::DPADD_U_W:
517+
case Mips::DPADD_U_D:
518+
case Mips::DPADD_S_H:
519+
case Mips::DPADD_S_W:
520+
case Mips::DPADD_S_D: {
521+
// The first operand is both input and output, so it should not commute
522+
if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3))
523+
return false;
524+
525+
if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
526+
return false;
527+
return true;
528+
}
529+
}
530+
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
531+
}

‎llvm/lib/Target/Mips/MipsInstrInfo.h

+3
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ class MipsInstrInfo : public MipsGenInstrInfo {
135135
MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
136136
MachineBasicBlock::iterator I) const;
137137

138+
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
139+
unsigned &SrcOpIdx2) const override;
140+
138141
protected:
139142
bool isZeroImm(const MachineOperand &op) const;
140143

‎llvm/test/CodeGen/Mips/msa/3r_4r_widen.ll

+36-48
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,16 @@
55
; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
66
; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
77

8-
@llvm_mips_dpadd_s_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
98
@llvm_mips_dpadd_s_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
109
@llvm_mips_dpadd_s_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
1110
@llvm_mips_dpadd_s_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
1211

1312
define void @llvm_mips_dpadd_s_h_test() nounwind {
1413
entry:
15-
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
16-
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
17-
%2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
18-
%3 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
19-
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_s_h_RES
14+
%0 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
15+
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
16+
%2 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>, <16 x i8> %0, <16 x i8> %1)
17+
store <8 x i16> %2, <8 x i16>* @llvm_mips_dpadd_s_h_RES
2018
ret void
2119
}
2220

@@ -25,23 +23,21 @@ declare <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
2523
; CHECK: llvm_mips_dpadd_s_h_test:
2624
; CHECK: ld.b
2725
; CHECK: ld.b
28-
; CHECK: ld.h
29-
; CHECK: dpadd_s.h
26+
; CHECK: ldi.h [[R1:\$w[0-9]+]],
27+
; CHECK: dpadd_s.h [[R1]],
3028
; CHECK: st.h
3129
; CHECK: .size llvm_mips_dpadd_s_h_test
3230
;
33-
@llvm_mips_dpadd_s_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
3431
@llvm_mips_dpadd_s_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
3532
@llvm_mips_dpadd_s_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
3633
@llvm_mips_dpadd_s_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
3734

3835
define void @llvm_mips_dpadd_s_w_test() nounwind {
3936
entry:
40-
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
41-
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
42-
%2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
43-
%3 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
44-
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_s_w_RES
37+
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
38+
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
39+
%2 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> <i32 4, i32 4, i32 4, i32 4>, <8 x i16> %0, <8 x i16> %1)
40+
store <4 x i32> %2, <4 x i32>* @llvm_mips_dpadd_s_w_RES
4541
ret void
4642
}
4743

@@ -50,48 +46,44 @@ declare <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
5046
; CHECK: llvm_mips_dpadd_s_w_test:
5147
; CHECK: ld.h
5248
; CHECK: ld.h
53-
; CHECK: ld.w
54-
; CHECK: dpadd_s.w
49+
; CHECK: ldi.w [[R1:\$w[0-9]+]],
50+
; CHECK: dpadd_s.w [[R1]],
5551
; CHECK: st.w
5652
; CHECK: .size llvm_mips_dpadd_s_w_test
5753
;
58-
@llvm_mips_dpadd_s_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
5954
@llvm_mips_dpadd_s_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
6055
@llvm_mips_dpadd_s_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
6156
@llvm_mips_dpadd_s_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
6257

6358
define void @llvm_mips_dpadd_s_d_test() nounwind {
6459
entry:
65-
%0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
66-
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
67-
%2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
68-
%3 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
69-
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_s_d_RES
60+
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
61+
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
62+
%2 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> <i64 4, i64 4>, <4 x i32> %0, <4 x i32> %1)
63+
store <2 x i64> %2, <2 x i64>* @llvm_mips_dpadd_s_d_RES
7064
ret void
7165
}
7266

7367
declare <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
7468

7569
; CHECK: llvm_mips_dpadd_s_d_test:
70+
; CHECK: ldi.d [[R1:\$w[0-9]+]],
7671
; CHECK: ld.w
7772
; CHECK: ld.w
78-
; CHECK: ld.d
79-
; CHECK: dpadd_s.d
73+
; CHECK: dpadd_s.d [[R1]],
8074
; CHECK: st.d
8175
; CHECK: .size llvm_mips_dpadd_s_d_test
8276
;
83-
@llvm_mips_dpadd_u_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
8477
@llvm_mips_dpadd_u_h_ARG2 = global <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23>, align 16
8578
@llvm_mips_dpadd_u_h_ARG3 = global <16 x i8> <i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39>, align 16
8679
@llvm_mips_dpadd_u_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
8780

8881
define void @llvm_mips_dpadd_u_h_test() nounwind {
8982
entry:
90-
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
91-
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
92-
%2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
93-
%3 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
94-
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_u_h_RES
83+
%0 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
84+
%1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
85+
%2 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>, <16 x i8> %0, <16 x i8> %1)
86+
store <8 x i16> %2, <8 x i16>* @llvm_mips_dpadd_u_h_RES
9587
ret void
9688
}
9789

@@ -100,23 +92,21 @@ declare <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
10092
; CHECK: llvm_mips_dpadd_u_h_test:
10193
; CHECK: ld.b
10294
; CHECK: ld.b
103-
; CHECK: ld.h
104-
; CHECK: dpadd_u.h
95+
; CHECK: ldi.h [[R1:\$w[0-9]+]],
96+
; CHECK: dpadd_u.h [[R1]],
10597
; CHECK: st.h
10698
; CHECK: .size llvm_mips_dpadd_u_h_test
10799
;
108-
@llvm_mips_dpadd_u_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
109100
@llvm_mips_dpadd_u_w_ARG2 = global <8 x i16> <i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11>, align 16
110101
@llvm_mips_dpadd_u_w_ARG3 = global <8 x i16> <i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19>, align 16
111102
@llvm_mips_dpadd_u_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
112103

113104
define void @llvm_mips_dpadd_u_w_test() nounwind {
114105
entry:
115-
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
116-
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
117-
%2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
118-
%3 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
119-
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_u_w_RES
106+
%0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
107+
%1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
108+
%2 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> <i32 4, i32 4, i32 4, i32 4>, <8 x i16> %0, <8 x i16> %1)
109+
store <4 x i32> %2, <4 x i32>* @llvm_mips_dpadd_u_w_RES
120110
ret void
121111
}
122112

@@ -125,33 +115,31 @@ declare <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
125115
; CHECK: llvm_mips_dpadd_u_w_test:
126116
; CHECK: ld.h
127117
; CHECK: ld.h
128-
; CHECK: ld.w
129-
; CHECK: dpadd_u.w
118+
; CHECK: ldi.w [[R1:\$w[0-9]+]],
119+
; CHECK: dpadd_u.w [[R1]],
130120
; CHECK: st.w
131121
; CHECK: .size llvm_mips_dpadd_u_w_test
132122
;
133-
@llvm_mips_dpadd_u_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
134123
@llvm_mips_dpadd_u_d_ARG2 = global <4 x i32> <i32 2, i32 3, i32 4, i32 5>, align 16
135124
@llvm_mips_dpadd_u_d_ARG3 = global <4 x i32> <i32 6, i32 7, i32 8, i32 9>, align 16
136125
@llvm_mips_dpadd_u_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
137126

138127
define void @llvm_mips_dpadd_u_d_test() nounwind {
139128
entry:
140-
%0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
141-
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
142-
%2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
143-
%3 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
144-
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_u_d_RES
129+
%0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
130+
%1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
131+
%2 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> <i64 4, i64 4>, <4 x i32> %0, <4 x i32> %1)
132+
store <2 x i64> %2, <2 x i64>* @llvm_mips_dpadd_u_d_RES
145133
ret void
146134
}
147135

148136
declare <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
149137

150138
; CHECK: llvm_mips_dpadd_u_d_test:
139+
; CHECK: ldi.d [[R1:\$w[0-9]+]],
151140
; CHECK: ld.w
152141
; CHECK: ld.w
153-
; CHECK: ld.d
154-
; CHECK: dpadd_u.d
142+
; CHECK: dpadd_u.d [[R1]],
155143
; CHECK: st.d
156144
; CHECK: .size llvm_mips_dpadd_u_d_test
157145
;

0 commit comments

Comments
 (0)
Please sign in to comment.