Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -8600,6 +8600,9 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return SDValue(); + switch (N->getOpcode()) { default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); @@ -8625,12 +8628,8 @@ case ISD::UMAX: case ISD::UMIN: case AMDGPUISD::FMIN_LEGACY: - case AMDGPUISD::FMAX_LEGACY: { - if (//DCI.getDAGCombineLevel() >= AfterLegalizeDAG && - getTargetMachine().getOptLevel() > CodeGenOpt::None) - return performMinMaxCombine(N, DCI); - break; - } + case AMDGPUISD::FMAX_LEGACY: + return performMinMaxCombine(N, DCI); case ISD::FMA: return performFMACombine(N, DCI); case ISD::LOAD: { Index: test/CodeGen/AMDGPU/dagcombine-setcc-select.ll =================================================================== --- test/CodeGen/AMDGPU/dagcombine-setcc-select.ll +++ test/CodeGen/AMDGPU/dagcombine-setcc-select.ll @@ -1,19 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -O0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}eq_t: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 2.0, 4.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @eq_t(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -26,18 +19,11 @@ ; GCN-LABEL: {{^}}ne_t: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VFOUR]], [[VTWO]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 4.0, 2.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @ne_t(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -50,18 +36,11 @@ ; GCN-LABEL: {{^}}eq_f: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VFOUR]], [[VTWO]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 4.0, 2.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @eq_f(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -74,18 +53,11 @@ ; GCN-LABEL: {{^}}ne_f: ; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], 1.0{{$}} ; GCN-NOT: 0xddd5 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp_eq_u32 -; GCN-NOT: v_cndmask_b32 -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC]] +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], 2.0, 4.0, [[CC]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @ne_f(float %x) { %c1 = fcmp olt float %x, 1.0 @@ -97,18 +69,8 @@ } ; GCN-LABEL: {{^}}different_constants: -; GCN-DAG: s_load_dword [[X:s[0-9]+]] -; GCN-DAG: s_mov_b32 [[SONE:s[0-9]+]], 1.0 -; GCN-DAG: v_mov_b32_e32 [[VONE:v[0-9]+]], [[SONE]] -; GCN-DAG: v_cmp_lt_f32_e{{32|64}} [[CC1:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[VONE]]{{$}} -; GCN-DAG: v_cndmask_b32_e{{32|64}} [[CND1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]] -; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC2:s\[[0-9]+:[0-9]+\]|vcc]], s{{[0-9]+}}, v{{[0-9]+}}{{$}} -; GCN-DAG: s_mov_b32 [[STWO:s[0-9]+]], 2.0 -; GCN-DAG: v_mov_b32_e32 [[VTWO:v[0-9]+]], [[STWO]] -; GCN-DAG: s_mov_b32 [[SFOUR:s[0-9]+]], 4.0 -; GCN-DAG: v_mov_b32_e32 [[VFOUR:v[0-9]+]], [[SFOUR]] -; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[VTWO]], [[VFOUR]], [[CC2]] -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 2.0 +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @different_constants(float %x) { %c1 = fcmp olt float %x, 1.0 %s1 = select i1 %c1, i32 56789, i32 1 Index: test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -26,17 +26,15 @@ ; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]] ; CHECK: buffer_load_dwordx4 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 ; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] ; CHECK: [[BB4]]: ; CHECK: buffer_load_dwordx4 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 -; CHECK: v_cndmask_b32_e64 +; CHECK: s_mov_b32 m0, +; CHECK: v_movrels_b32_e32 ; CHECK: [[ENDBB]]: ; CHECK: buffer_store_dword