Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -67,21 +67,27 @@ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; +protected: SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const; SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; -protected: static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT); virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const; + /// Return 64-bit value Op as two 32-bit integers. + std::pair split64BitValue(SDValue Op, + SelectionDAG &DAG) const; + /// \brief Split a vector load into a scalar load of each component. SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -28,7 +28,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" - +#include "SIInstrInfo.h" using namespace llvm; static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, @@ -376,6 +376,7 @@ setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::MUL); @@ -1177,6 +1178,21 @@ return SDValue(); } +std::pair +AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + + SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); + + const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); + const SDValue One = DAG.getConstant(1, SL, MVT::i32); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); + + return std::make_pair(Lo, Hi); +} + SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast(Op); @@ -2539,6 +2555,43 @@ SN->getBasePtr(), SN->getMemOperand()); } +// TODO: Should repeat for other bit ops. +SDValue AMDGPUTargetLowering::performAndCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + if (N->getValueType(0) != MVT::i64) + return SDValue(); + + // Break up 64-bit and of a constant into two 32-bit ands. This will typically + // happen anyway for a VALU 64-bit and. This exposes other 32-bit integer + // combine opportunities since most 64-bit operations are decomposed this way. + // TODO: We won't want this for SALU especially if it is an inline immediate. + const ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); + if (!RHS) + return SDValue(); + + uint64_t Val = RHS->getZExtValue(); + if (Lo_32(Val) != 0 && Hi_32(Val) != 0 && !RHS->hasOneUse()) { + // If either half of the constant is 0, this is really a 32-bit and, so + // split it. If we can re-use the full materialized constant, keep it. + return SDValue(); + } + + SDLoc SL(N); + SelectionDAG &DAG = DCI.DAG; + + SDValue Lo, Hi; + std::tie(Lo, Hi) = split64BitValue(N->getOperand(0), DAG); + + SDValue LoRHS = DAG.getConstant(Lo_32(Val), SL, MVT::i32); + SDValue HiRHS = DAG.getConstant(Hi_32(Val), SL, MVT::i32); + + SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, LoRHS); + SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, HiRHS); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, LoAnd, HiAnd); + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); +} + SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (N->getValueType(0) != MVT::i64) @@ -2751,6 +2804,12 @@ return performSrlCombine(N, DCI); } + case ISD::AND: { + if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) + break; + + return performAndCombine(N, DCI); + } case ISD::MUL: return performMulCombine(N, DCI); case AMDGPUISD::MUL_I24: Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1915,6 +1915,9 @@ if (DCI.isBeforeLegalize()) return SDValue(); + if (SDValue Base = AMDGPUTargetLowering::performAndCombine(N, DCI)) + return Base; + SelectionDAG &DAG = DCI.DAG; // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) -> Index: test/CodeGen/AMDGPU/and.ll =================================================================== --- test/CodeGen/AMDGPU/and.ll +++ test/CodeGen/AMDGPU/and.ll @@ -177,14 +177,62 @@ ret void } -; FUNC-LABEL: {{^}}s_and_constant_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +; FUNC-LABEL: {{^}}s_and_constant_i64: +; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000{{$}} +; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80{{$}} +; SI: buffer_store_dwordx2 define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) { - %and = and i64 %a, 281474976710655 + %and = and i64 %a, 549756338176 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_and_multi_use_constant_i64: +; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}} +; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}} +; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}} +define void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %and0 = and i64 %a, 549756338176 + %and1 = and i64 %b, 549756338176 + store volatile i64 %and0, i64 addrspace(1)* %out + store volatile i64 %and1, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}s_and_32_bit_constant_i64: +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687{{$}} +; SI-NOT: and +; SI: buffer_store_dwordx2 +define void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i64 %a) { + %and = and i64 %a, 1234567 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } +; FUNC-LABEL: {{^}}s_and_multi_use_inline_imm_i64: +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 62 +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 62 +; SI-NOT: and +; SI: buffer_store_dwordx2 +define void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 %a, i64 %b, i64 %c) { + %shl.a = shl i64 %a, 1 + %shl.b = shl i64 %b, 1 + %and0 = and i64 %shl.a, 62 + %and1 = and i64 %shl.b, 62 + %add0 = add i64 %and0, %c + %add1 = add i64 %and1, %c + store volatile i64 %add0, i64 addrspace(1)* %out + store volatile i64 %add1, i64 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}v_and_i64: ; SI: v_and_b32 ; SI: v_and_b32 @@ -217,10 +265,8 @@ } ; FUNC-LABEL: {{^}}v_and_constant_i64: -; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207 -; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}} -; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], {{v[0-9]+}} -; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], {{v[0-9]+}} +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0xab19b207, {{v[0-9]+}} +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}} ; SI: buffer_store_dwordx2 define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { %a = load i64, i64 addrspace(1)* %aptr, align 8 @@ -229,10 +275,54 @@ ret void } -; FIXME: Should replace and 0 +; FUNC-LABEL: {{^}}v_and_multi_use_constant_i64: +; SI: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}} +; SI: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}} +; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207{{$}} +; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}} +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO0]] +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI0]] +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO1]] +; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI1]] +; SI: buffer_store_dwordx2 +; SI: buffer_store_dwordx2 +define void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load volatile i64, i64 addrspace(1)* %aptr + %b = load volatile i64, i64 addrspace(1)* %aptr + %and0 = and i64 %a, 1231231234567 + %and1 = and i64 %b, 1231231234567 + store volatile i64 %and0, i64 addrspace(1)* %out + store volatile i64 %and1, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_and_multi_use_inline_imm_i64: +; SI: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}} +; SI-NOT: and +; SI: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}} +; SI-NOT: and +; SI: v_and_b32_e32 v[[RESLO0:[0-9]+]], 63, v[[LO0]] +; SI: v_and_b32_e32 v[[RESLO1:[0-9]+]], 63, v[[LO1]] +; SI-NOT: and +; SI: buffer_store_dwordx2 +; SI-NOT: and +; SI: buffer_store_dwordx2 +define void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load volatile i64, i64 addrspace(1)* %aptr + %b = load volatile i64, i64 addrspace(1)* %aptr + %and0 = and i64 %a, 63 + %and1 = and i64 %b, 63 + store volatile i64 %and0, i64 addrspace(1)* %out + store volatile i64 %and1, i64 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant: -; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} -; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +; SI: buffer_load_dword [[VAL:v[0-9]+]] +; SI-NOT: and +; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]] +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { %a = load i64, i64 addrspace(1)* %aptr, align 8 %and = and i64 %a, 1234567 @@ -240,10 +330,12 @@ ret void } -; FIXME: Replace and 0 with mov 0 ; FUNC-LABEL: {{^}}v_and_inline_imm_i64: +; SI: buffer_load_dword v{{[0-9]+}} +; SI-NOT: and ; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}} -; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}} +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { %a = load i64, i64 addrspace(1)* %aptr, align 8 %and = and i64 %a, 64 @@ -252,15 +344,38 @@ } ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64 +; SI: s_load_dword +; SI-NOT: and +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 64 +; SI-NOT: and +; SI: buffer_store_dword define void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 64 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } +; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64_noshrink: +; SI: s_lshl_b64 s{{\[}}[[VALLO:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1 +; SI-NOT: and +; SI: s_and_b32 s{{[0-9]+}}, s[[VALLO]], 64 +; SI-NOT: and +; SI: s_add_u32 +; SI-NEXT: s_addc_u32 +define void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i64 %b) { + %shl = shl i64 %a, 1 + %and = and i64 %shl, 64 + %add = add i64 %and, %b + store i64 %add, i64 addrspace(1)* %out, align 8 + ret void +} + ; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 1 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -268,7 +383,14 @@ } ; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 +; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 + +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3ff00000 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 4607182418800017408 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -276,7 +398,14 @@ } ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0 +; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0 + +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbff00000 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 13830554455654793216 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -284,47 +413,85 @@ } ; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5 +; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5 + +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3fe00000 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 4602678819172646912 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5 +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64: +; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5 + +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbfe00000 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 13826050856027422720 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 2.0 +; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64: +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 2.0 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 4611686018427387904 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -2.0 +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64: +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, -2.0 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 13835058055282163712 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0 +; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64: +; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0 + +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x40100000 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 4616189618054758400 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64 -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0 +; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64: +; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0 + +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xc0100000 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 13839561654909534208 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -335,22 +502,26 @@ ; Test with the 64-bit integer bitpattern for a 32-bit float in the ; low 32-bits, which is not a valid 64-bit inline immmediate. -; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64 -; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0 -; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}} -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64: +; SI: s_load_dwordx2 +; SI: s_load_dword s +; SI-NOT: and +; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 1082130432 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } -; FIXME: Copy of -1 register -; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64 -; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0 -; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}} -; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]] -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}} +; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64: +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, -1065353216 store i64 %and, i64 addrspace(1)* %out, align 8 @@ -358,20 +529,25 @@ } ; Shift into upper 32-bits -; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_4.0_i64 -; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0 -; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 4647714815446351872 store i64 %and, i64 addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64 -; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0 -; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} -; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} +; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64: +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI-NOT: and +; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0 +; SI-NOT: and +; SI: buffer_store_dwordx2 define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %and = and i64 %a, 13871086852301127680 store i64 %and, i64 addrspace(1)* %out, align 8 Index: test/CodeGen/AMDGPU/shift-i64-opts.ll =================================================================== --- test/CodeGen/AMDGPU/shift-i64-opts.ll +++ test/CodeGen/AMDGPU/shift-i64-opts.ll @@ -50,15 +50,18 @@ ret void } +; Make sure the and of the constant doesn't prevent bfe from forming +; after 64-bit shift is split. + ; GCN-LABEL: {{^}}lshr_and_i64_35: -; XGCN: buffer_load_dword [[VAL:v[0-9]+]] -; XGCN: v_lshlrev_b32_e32 v[[LO:[0-9]+]], 3, [[VAL]] -; XGCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} -; XGCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} +; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[HI]], 8, 23 +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} define void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %val = load i64, i64 addrspace(1)* %in - %and = and i64 %val, 2147483647 ; 0x7fffffff - %shl = lshr i64 %and, 35 + %and = and i64 %val, 9223372036854775807 ; 0x7fffffffffffffff + %shl = lshr i64 %and, 40 store i64 %shl, i64 addrspace(1)* %out ret void } Index: test/CodeGen/AMDGPU/shl.ll =================================================================== --- test/CodeGen/AMDGPU/shl.ll +++ test/CodeGen/AMDGPU/shl.ll @@ -208,4 +208,173 @@ ret void } +; FUNC-LABEL: {{^}}s_shl_constant_i64 +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +define void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) { + %shl = shl i64 281474976710655, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_shl_constant_i64: +; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] +; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0xab19b207 +; SI-DAG: s_movk_i32 s[[KHI:[0-9]+]], 0x11e{{$}} +; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]] +; SI: buffer_store_dwordx2 +define void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %shl = shl i64 1231231234567, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_shl_i64_32_bit_constant: +; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] +; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x12d687{{$}} +; SI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0{{$}} +; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]] +define void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %shl = shl i64 1234567, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}v_shl_inline_imm_64_i64: +; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, 64, {{v[0-9]+}} +define void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %shl = shl i64 64, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_64_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 64, s{{[0-9]+}} +define void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 64, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_1_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1, s{{[0-9]+}} +define void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 1, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_1.0_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} +define void @s_shl_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 4607182418800017408, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_1.0_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}} +define void @s_shl_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 13830554455654793216, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_0.5_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 0.5, s{{[0-9]+}} +define void @s_shl_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 4602678819172646912, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_0.5_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -0.5, s{{[0-9]+}} +define void @s_shl_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 13826050856027422720, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_2.0_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 2.0, s{{[0-9]+}} +define void @s_shl_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 4611686018427387904, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_2.0_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -2.0, s{{[0-9]+}} +define void @s_shl_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 13835058055282163712, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_4.0_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 4.0, s{{[0-9]+}} +define void @s_shl_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 4616189618054758400, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_4.0_i64: +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -4.0, s{{[0-9]+}} +define void @s_shl_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 13839561654909534208, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + + +; Test with the 64-bit integer bitpattern for a 32-bit float in the +; low 32-bits, which is not a valid 64-bit inline immmediate. + +; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_4.0_i64: +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}} +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}} +define void @s_shl_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 1082130432, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FIXME: Copy of -1 register +; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_neg_4.0_i64: +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0 +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}} +; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]] +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}}, s{{[0-9]+}} +define void @s_shl_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 -1065353216, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; Shift into upper 32-bits +; FUNC-LABEL: {{^}}s_shl_inline_high_imm_f32_4.0_i64: +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}} +define void @s_shl_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 4647714815446351872, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}s_shl_inline_high_imm_f32_neg_4.0_i64: +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0 +; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}} +define void @s_shl_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { + %shl = shl i64 13871086852301127680, %a + store i64 %shl, i64 addrspace(1)* %out, align 8 + ret void +} + attributes #0 = { nounwind readnone }