Skip to content

Commit 7b2c98a

Browse files
committedAug 7, 2018
AMDGPU: Remove broken i16 ternary patterns
Fixup test to check for GCN prefix These patterns always zero extend the result even though it might need sign extension. This has been broken since the addition of i16 support. It has popped up in mad_sat(char) test since min(max()) combination is turned into v_med3, resulting in the following (incorrect) sequence: v_mad_i16 v2, v10, v9, v11 v_med3_i32 v2, v2, v8, v7 Fixes mad_sat(char) piglit on VI. Differential Revision: https://reviews.llvm.org/D49836 llvm-svn: 339190
1 parent a320e39 commit 7b2c98a

File tree

2 files changed

+90
-14
lines changed

2 files changed

+90
-14
lines changed
 

Diff for: ‎llvm/lib/Target/AMDGPU/VOP3Instructions.td

-11
Original file line numberDiff line numberDiff line change
@@ -461,17 +461,6 @@ def : GCNPat <
461461
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
462462
>;
463463

464-
def : GCNPat<
465-
(i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
466-
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
467-
>;
468-
469-
def : GCNPat<
470-
(i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
471-
(REG_SEQUENCE VReg_64,
472-
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0,
473-
(V_MOV_B32_e32 (i32 0)), sub1)
474-
>;
475464
}
476465

477466
defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;

Diff for: ‎llvm/test/CodeGen/AMDGPU/mad_uint24.ll

+90-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
22
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
3-
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
4-
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
5-
; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC
3+
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN
4+
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
5+
; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN
66

77
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
88

@@ -138,3 +138,90 @@ bb18: ; preds = %bb4
138138
store i32 %tmp16, i32 addrspace(1)* %arg
139139
ret void
140140
}
141+
142+
; FUNC-LABEL: {{^}}i8_mad_sat_16:
143+
; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
144+
; The result must be sign-extended
145+
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
146+
; EG: 8
147+
; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
148+
; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
149+
; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
150+
; GCN: v_med3_i32 v{{[0-9]}}, [[EXT]],
151+
define amdgpu_kernel void @i8_mad_sat_16(i8 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1, i8 addrspace(1)* %in2, i64 addrspace(5)* %idx) {
152+
entry:
153+
%retval.0.i = load i64, i64 addrspace(5)* %idx
154+
%arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 %retval.0.i
155+
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 %retval.0.i
156+
%arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %in2, i64 %retval.0.i
157+
%l1 = load i8, i8 addrspace(1)* %arrayidx, align 1
158+
%l2 = load i8, i8 addrspace(1)* %arrayidx2, align 1
159+
%l3 = load i8, i8 addrspace(1)* %arrayidx4, align 1
160+
%conv1.i = sext i8 %l1 to i16
161+
%conv3.i = sext i8 %l2 to i16
162+
%conv5.i = sext i8 %l3 to i16
163+
%mul.i.i.i = mul nsw i16 %conv3.i, %conv1.i
164+
%add.i.i = add i16 %mul.i.i.i, %conv5.i
165+
%c4 = icmp sgt i16 %add.i.i, -128
166+
%cond.i.i = select i1 %c4, i16 %add.i.i, i16 -128
167+
%c5 = icmp slt i16 %cond.i.i, 127
168+
%cond13.i.i = select i1 %c5, i16 %cond.i.i, i16 127
169+
%conv8.i = trunc i16 %cond13.i.i to i8
170+
%arrayidx7 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 %retval.0.i
171+
store i8 %conv8.i, i8 addrspace(1)* %arrayidx7, align 1
172+
ret void
173+
}
174+
175+
; FUNC-LABEL: {{^}}i8_mad_32:
176+
; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
177+
; The result must be sign-extended
178+
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
179+
; EG: 8
180+
; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
181+
; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
182+
; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
183+
define amdgpu_kernel void @i8_mad_32(i32 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) {
184+
entry:
185+
%retval.0.i = load i64, i64 addrspace(5)* %idx
186+
%arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i
187+
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i
188+
%arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i
189+
%la = load i8, i8 addrspace(1)* %arrayidx, align 1
190+
%lb = load i8, i8 addrspace(1)* %arrayidx2, align 1
191+
%lc = load i8, i8 addrspace(1)* %arrayidx4, align 1
192+
%exta = sext i8 %la to i16
193+
%extb = sext i8 %lb to i16
194+
%extc = sext i8 %lc to i16
195+
%mul = mul i16 %exta, %extb
196+
%mad = add i16 %mul, %extc
197+
%mad_ext = sext i16 %mad to i32
198+
store i32 %mad_ext, i32 addrspace(1)* %out
199+
ret void
200+
}
201+
202+
; FUNC-LABEL: {{^}}i8_mad_64:
203+
; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
204+
; The result must be sign-extended
205+
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
206+
; EG: 8
207+
; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
208+
; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
209+
; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
210+
define amdgpu_kernel void @i8_mad_64(i64 addrspace(1)* %out, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i8 addrspace(1)* %c, i64 addrspace(5)* %idx) {
211+
entry:
212+
%retval.0.i = load i64, i64 addrspace(5)* %idx
213+
%arrayidx = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 %retval.0.i
214+
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %b, i64 %retval.0.i
215+
%arrayidx4 = getelementptr inbounds i8, i8 addrspace(1)* %c, i64 %retval.0.i
216+
%la = load i8, i8 addrspace(1)* %arrayidx, align 1
217+
%lb = load i8, i8 addrspace(1)* %arrayidx2, align 1
218+
%lc = load i8, i8 addrspace(1)* %arrayidx4, align 1
219+
%exta = sext i8 %la to i16
220+
%extb = sext i8 %lb to i16
221+
%extc = sext i8 %lc to i16
222+
%mul = mul i16 %exta, %extb
223+
%mad = add i16 %mul, %extc
224+
%mad_ext = sext i16 %mad to i64
225+
store i64 %mad_ext, i64 addrspace(1)* %out
226+
ret void
227+
}

0 commit comments

Comments
 (0)
Please sign in to comment.