Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -451,9 +451,6 @@ for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) setLibcallName(static_cast(I), nullptr); - setBooleanContents(ZeroOrNegativeOneBooleanContent); - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - setSchedulingPreference(Sched::RegPressure); setJumpIsExpensive(true); Index: llvm/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -62,6 +62,9 @@ addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); + setBooleanContents(ZeroOrNegativeOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + computeRegisterProperties(Subtarget->getRegisterInfo()); // Legalize loads and stores to the private address space. Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -160,6 +160,13 @@ computeRegisterProperties(Subtarget->getRegisterInfo()); + // The boolean content concept here is too inflexible. Compares only ever + // really produce a 1-bit result. Any copy/extend from these will turn into a + // select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as + // it's what most targets use. + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); + // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v3i32, Custom); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -542,7 +542,7 @@ RC == &AMDGPU::SReg_32RegClass) { if (SrcReg == AMDGPU::SCC) { BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) - .addImm(-1) + .addImm(1) .addImm(0); return; } @@ -840,7 +840,7 @@ Register SReg = MRI.createVirtualRegister(BoolXExecRC); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) - .addImm(-1) + .addImm(1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) @@ -855,7 +855,7 @@ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(0) - .addImm(-1); + .addImm(1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) @@ -900,7 +900,7 @@ .addImm(0); BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) - .addImm(-1) + .addImm(1) .addImm(0); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) @@ -919,7 +919,7 @@ BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64), SReg) .addImm(0) - .addImm(-1); + .addImm(1); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addImm(0) .addReg(FalseReg) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll @@ -9,7 +9,7 @@ ; GCN-NEXT: ; implicit-def: $vcc_hi ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_eq_u32 s0, 0 -; GCN-NEXT: s_cselect_b32 s0, -1, 0 +; GCN-NEXT: s_cselect_b32 s0, 1, 0 ; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: v_mov_b32_e32 v0, s0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll @@ -8,7 +8,7 @@ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_cmp_eq_u32 s2, 0 -; GCN-NEXT: s_cselect_b32 s2, -1, 0 +; GCN-NEXT: s_cselect_b32 s2, 1, 0 ; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 ; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] ; GCN-NEXT: v_mov_b32_e32 v0, s0 Index: llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll +++ llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll @@ -13,7 +13,14 @@ ; GCN-LABEL: {{^}}work_item_info: ; GCN-NOT: v0 -; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0 +; GCN: s_load_dword [[IN:s[0-9]+]] +; GCN-NOT: v0 + +; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0 + +; GCN-PROMOTE: v_cmp_eq_u32_e64 vcc, [[IN]], 1 +; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc + ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { entry: Index: llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll +++ llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}add1: ; GCN: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} @@ -130,16 +130,14 @@ ; GCN-LABEL: {{^}}sub_sube_commuted: ; GCN-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GCN-DAG: buffer_load_dword [[V:v[0-9]+]], -; GCN: v_subbrev_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]] -; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, s{{[0-9]+}}, [[SUBB]] -; GCN: v_add_i32_e32 {{.*}}, 0x64, [[SUB]] +; GCN: v_addc_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GCN: v_add_i32_e32 {{.*}}, 0x64, [[ADDC]] ; GFX9-LABEL: {{^}}sub_sube_commuted: ; GFX9-DAG: v_cmp_gt_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}} ; GFX9-DAG: global_load_dword [[V:v[0-9]+]], -; GFX9: v_subbrev_co_u32_e{{32|64}} [[SUBB:v[0-9]+]], {{[^,]+}}, 0, [[V]], [[CC]] -; GFX9: v_sub_u32_e32 [[SUB:v[0-9]+]], s{{[0-9]+}}, [[SUBB]] -; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[SUB]] +; GFX9: v_addc_co_u32_e32 [[ADDC:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, [[CC]] +; GFX9: v_add_u32_e32 {{.*}}, 0x64, [[ADDC]] define amdgpu_kernel void @sub_sube_commuted(i32 addrspace(1)* nocapture %arg, i32 %a) { bb: %x = tail call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -306,10 +306,10 @@ ; GCN-LABEL: {{^}}bit128_extelt: ; GCN-NOT: buffer_ -; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 1, 0, +; GCN-DAG: v_cndmask_b32_e{{32|64}} [[V1:v[0-9]+]], 0, 1 ; GCN-DAG: v_mov_b32_e32 [[LASTIDX:v[0-9]+]], 0x7f ; GCN-DAG: v_cmp_ne_u32_e32 [[CL:[^,]+]], s{{[0-9]+}}, [[LASTIDX]] -; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, v{{[0-9]+}}, [[CL]] +; GCN-DAG: v_cndmask_b32_e{{32|64}} [[VL:v[0-9]+]], 0, [[V1]], [[CL]] ; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[VL]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]] define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) {