Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2224,6 +2224,36 @@ SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + EVT VT = N->getValueType(0); + if (VT == MVT::i64) { + // TODO: This could be a generic combine with a predicate for extracting the + // high half of an integer being free. + + // (or i64:x, (zero_extend i32:y)) -> + // i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x))) + if (LHS.getOpcode() == ISD::ZERO_EXTEND && + RHS.getOpcode() != ISD::ZERO_EXTEND) + std::swap(LHS, RHS); + + if (RHS.getOpcode() == ISD::ZERO_EXTEND) { + SDValue ExtSrc = RHS.getOperand(0); + EVT SrcVT = ExtSrc.getValueType(); + if (SrcVT == MVT::i32) { + SDLoc SL(N); + SDValue LowLHS, HiBits; + std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG); + SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc); + + DCI.AddToWorklist(LowOr.getNode()); + DCI.AddToWorklist(HiBits.getNode()); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + LowOr, HiBits); + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); + } + } + } + // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2) if (LHS.getOpcode() == AMDGPUISD::FP_CLASS && RHS.getOpcode() == AMDGPUISD::FP_CLASS) { Index: test/CodeGen/AMDGPU/zext-i64-bit-operand.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/zext-i64-bit-operand.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}zext_or_operand_i64: +; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} +; GCN: buffer_load_dword v[[LD32:[0-9]+]] +; GCN-NOT: or +; GCN-NOT: v[[HI]] +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]] +; GCN-NOT: or +; GCN-NOT: v[[HI]] +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @zext_or_operand_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) { + %ld.64 = load volatile i64, i64 addrspace(1)* %in0 + %ld.32 = load volatile i32, i32 addrspace(1)* %in1 + %ext = zext i32 %ld.32 to i64 + %or = or i64 %ld.64, %ext + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}zext_or_operand_commute_i64: +; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} +; GCN: buffer_load_dword v[[LD32:[0-9]+]] +; GCN-NOT: or +; GCN-NOT: v[[HI]] +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN: v_or_b32_e32 v[[LO]], v[[LD32]], v[[LO]] +; GCN-NOT: v[[HI]] +; GCN-NOT: or +; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 +; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @zext_or_operand_commute_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i32 addrspace(1)* %in1) { + %ld.64 = load volatile i64, i64 addrspace(1)* %in0 + %ld.32 = load volatile i32, i32 addrspace(1)* %in1 + %ext = zext i32 %ld.32 to i64 + %or = or i64 %ext, %ld.64 + store i64 %or, i64 addrspace(1)* %out + ret void +}