Please use GitHub pull requests for new patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/trunk/test/CodeGen/AMDGPU/mad_uint24.ll
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC | ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC | ||||
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC | ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC | ||||
; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN | ; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN1 | ||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN | ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 | ||||
; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN | ; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 | ||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone | declare i32 @llvm.r600.read.tidig.x() nounwind readnone | ||||
; FUNC-LABEL: {{^}}u32_mad24: | ; FUNC-LABEL: {{^}}u32_mad24: | ||||
; EG: MULADD_UINT24 | ; EG: MULADD_UINT24 | ||||
; SI: v_mad_u32_u24 | ; SI: v_mad_u32_u24 | ||||
; VI: v_mad_u32_u24 | ; VI: v_mad_u32_u24 | ||||
Show All 11 Lines | |||||
; FUNC-LABEL: {{^}}i16_mad24: | ; FUNC-LABEL: {{^}}i16_mad24: | ||||
; The order of A and B does not matter. | ; The order of A and B does not matter. | ||||
; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] | ; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] | ||||
; The result must be sign-extended | ; The result must be sign-extended | ||||
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x | ; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x | ||||
; EG: 16 | ; EG: 16 | ||||
; FIXME: Should be using scalar instructions here. | ; FIXME: Should be using scalar instructions here. | ||||
; GCN: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} | ; GCN1: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} | ||||
; GCN: v_bfe_i32 v{{[0-9]}}, [[MAD]], 0, 16 | ; GCN1: v_bfe_i32 v{{[0-9]}}, [[MAD]], 0, 16 | ||||
; GCN2: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}} | |||||
; GCN2: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}} | |||||
; GCN2: s_sext_i32_i16 s0, [[MAD]] | |||||
; GCN2: v_mov_b32_e32 v0, s0 | |||||
define amdgpu_kernel void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { | define amdgpu_kernel void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) { | ||||
entry: | entry: | ||||
%0 = mul i16 %a, %b | %0 = mul i16 %a, %b | ||||
%1 = add i16 %0, %c | %1 = add i16 %0, %c | ||||
%2 = sext i16 %1 to i32 | %2 = sext i16 %1 to i32 | ||||
store i32 %2, i32 addrspace(1)* %out | store i32 %2, i32 addrspace(1)* %out | ||||
ret void | ret void | ||||
} | } | ||||
; FIXME: Need to handle non-uniform case for function below (load without gep). | ; FIXME: Need to handle non-uniform case for function below (load without gep). | ||||
; FUNC-LABEL: {{^}}i8_mad24: | ; FUNC-LABEL: {{^}}i8_mad24: | ||||
; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] | ; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] | ||||
; The result must be sign-extended | ; The result must be sign-extended | ||||
; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x | ; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x | ||||
; EG: 8 | ; EG: 8 | ||||
; GCN: v_mad_u32_u24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} | ; GCN1: v_mad_u32_u24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}} | ||||
; GCN: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8 | ; GCN1: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8 | ||||
; GCN2: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}} | |||||
; GCN2: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}} | |||||
; GCN2: s_sext_i32_i8 s0, [[MAD]] | |||||
; GCN2: v_mov_b32_e32 v0, s0 | |||||
define amdgpu_kernel void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { | define amdgpu_kernel void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) { | ||||
entry: | entry: | ||||
%0 = mul i8 %a, %b | %0 = mul i8 %a, %b | ||||
%1 = add i8 %0, %c | %1 = add i8 %0, %c | ||||
%2 = sext i8 %1 to i32 | %2 = sext i8 %1 to i32 | ||||
store i32 %2, i32 addrspace(1)* %out | store i32 %2, i32 addrspace(1)* %out | ||||
ret void | ret void | ||||
} | } | ||||
▲ Show 20 Lines • Show All 168 Lines • Show Last 20 Lines |