Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1704,6 +1704,7 @@ case G_FPTOUI: case G_INTTOPTR: case G_PTRTOINT: + case G_ADDRSPACE_CAST: return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); case G_ICMP: case G_FCMP: Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -27,6 +27,17 @@ public: AMDGPULegalizerInfo(const GCNSubtarget &ST, const GCNTargetMachine &TM); + + bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const override; + + unsigned getSegmentAperture(unsigned AddrSpace, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; + + bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; }; } // End llvm namespace. #endif Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -14,6 +14,9 @@ #include "AMDGPU.h" #include "AMDGPULegalizerInfo.h" #include "AMDGPUTargetMachine.h" +#include "SIMachineFunctionInfo.h" + +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" @@ -276,6 +279,12 @@ return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits())); }); + if (ST.hasFlatAddressSpace()) { + getActionDefinitionsBuilder(G_ADDRSPACE_CAST) + .scalarize(0) + .custom(); + } + getActionDefinitionsBuilder({G_LOAD, G_STORE}) .legalIf([=, &ST](const LegalityQuery &Query) { const LLT &Ty0 = Query.Types[0]; @@ -474,3 +483,171 @@ computeTables(); verify(*ST.getInstrInfo()); } + +bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_ADDRSPACE_CAST: + return legalizeAddrSpaceCast(MI, MRI, MIRBuilder); + default: + return false; + } + + llvm_unreachable("expected switch to return"); +} + +unsigned AMDGPULegalizerInfo::getSegmentAperture( + unsigned AS, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + MachineFunction &MF = MIRBuilder.getMF(); + const GCNSubtarget &ST = MF.getSubtarget(); + const LLT S32 = LLT::scalar(32); + + if (ST.hasApertureRegs()) { + // FIXME: Use inline constants (src_{shared, private}_base) instead of + // getreg. + unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ? + AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE : + AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE; + unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ? + AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE : + AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE; + unsigned Encoding = + AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ | + Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ | + WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_; + + unsigned ShiftAmt = MRI.createGenericVirtualRegister(S32); + unsigned ApertureReg = MRI.createGenericVirtualRegister(S32); + unsigned GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + + MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32) + .addDef(GetReg) + .addImm(Encoding); + MRI.setType(GetReg, S32); + + MIRBuilder.buildConstant(ShiftAmt, WidthM1 + 1); + MIRBuilder.buildInstr(TargetOpcode::G_SHL) + .addDef(ApertureReg) + .addUse(GetReg) + .addUse(ShiftAmt); + + return ApertureReg; + } + + unsigned QueuePtr = MRI.createGenericVirtualRegister( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64)); + + // FIXME: Placeholder until we can track the input registers. + MIRBuilder.buildConstant(QueuePtr, 0xdeadbeef); + + // Offset into amd_queue_t for group_segment_aperture_base_hi / + // private_segment_aperture_base_hi. + uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44; + + // FIXME: Don't use undef + Value *V = UndefValue::get(PointerType::get( + Type::getInt8Ty(MF.getFunction().getContext()), + AMDGPUAS::CONSTANT_ADDRESS)); + + MachinePointerInfo PtrInfo(V, StructOffset); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, + MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant, + 4, + MinAlign(64, StructOffset)); + + unsigned LoadResult = MRI.createGenericVirtualRegister(S32); + unsigned LoadAddr = AMDGPU::NoRegister; + + MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset); + MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO); + return LoadResult; +} + +bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + MachineFunction &MF = MIRBuilder.getMF(); + + MIRBuilder.setInstr(MI); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + unsigned DestAS = DstTy.getAddressSpace(); + unsigned SrcAS = SrcTy.getAddressSpace(); + + // TODO: Avoid reloading from the queue ptr for each cast, or at least each + // vector element. + assert(!DstTy.isVector()); + + const AMDGPUTargetMachine &TM + = static_cast(MF.getTarget()); + + const GCNSubtarget &ST = MF.getSubtarget(); + if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) { + MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY)); + return true; + } + + if (SrcAS == AMDGPUAS::FLAT_ADDRESS) { + assert(DestAS == AMDGPUAS::LOCAL_ADDRESS || + DestAS == AMDGPUAS::PRIVATE_ADDRESS); + unsigned NullVal = TM.getNullPointerValue(DestAS); + + unsigned SegmentNullReg = MRI.createGenericVirtualRegister(DstTy); + unsigned FlatNullReg = MRI.createGenericVirtualRegister(SrcTy); + + MIRBuilder.buildConstant(SegmentNullReg, NullVal); + MIRBuilder.buildConstant(FlatNullReg, 0); + + unsigned PtrLo32 = MRI.createGenericVirtualRegister(DstTy); + + // Extract low 32-bits of the pointer. + MIRBuilder.buildExtract(PtrLo32, Src, 0); + + unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); + MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNullReg); + MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNullReg); + + MI.eraseFromParent(); + return true; + } + + assert(SrcAS == AMDGPUAS::LOCAL_ADDRESS || + SrcAS == AMDGPUAS::PRIVATE_ADDRESS); + + unsigned FlatNullReg = MRI.createGenericVirtualRegister(DstTy); + unsigned SegmentNullReg = MRI.createGenericVirtualRegister(SrcTy); + MIRBuilder.buildConstant(SegmentNullReg, TM.getNullPointerValue(SrcAS)); + MIRBuilder.buildConstant(FlatNullReg, TM.getNullPointerValue(DestAS)); + + unsigned ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder); + + unsigned CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1)); + MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNullReg); + + unsigned BuildPtr = MRI.createGenericVirtualRegister(DstTy); + + // Coerce the type of the low half of the result so we can use merge_values. + unsigned SrcAsInt = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT) + .addDef(SrcAsInt) + .addUse(Src); + + // TODO: Should we allow mismatched types but matching sizes in merges to + // avoid the ptrtoint? + MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg}); + MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNullReg); + + MI.eraseFromParent(); + return true; +} Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1217,7 +1217,8 @@ static bool isFlatGlobalAddrSpace(unsigned AS) { return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + AS == AMDGPUAS::CONSTANT_ADDRESS || + AS >= AMDGPUAS::MAX_AMDGPU_ADDRESS; } bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, Index: test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -0,0 +1,393 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- +name: test_addrspacecast_p0_to_p1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p0_to_p1 + ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[COPY]](p0) + ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p1) + ; GFX9-LABEL: name: test_addrspacecast_p0_to_p1 + ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(p1) = COPY [[COPY]](p0) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p1) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(p1) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p1_to_p0 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p1_to_p0 + ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p1) + ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0) + ; GFX9-LABEL: name: test_addrspacecast_p1_to_p0 + ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p1) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(p0) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p0_to_p4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p0_to_p4 + ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p0) + ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p4) + ; GFX9-LABEL: name: test_addrspacecast_p0_to_p4 + ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(p4) = COPY [[COPY]](p0) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p4) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(p4) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p4_to_p0 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p4_to_p0 + ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p4) + ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0) + ; GFX9-LABEL: name: test_addrspacecast_p4_to_p0 + ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p4) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0) + %0:_(p4) = COPY $vgpr0_vgpr1 + %1:_(p0) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p0_to_p999 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p0_to_p999 + ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p999) = COPY [[COPY]](p0) + ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p999) + ; GFX9-LABEL: name: test_addrspacecast_p0_to_p999 + ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(p999) = COPY [[COPY]](p0) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p999) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(p999) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p999_to_p0 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p999_to_p0 + ; VI: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 + ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p999) + ; VI: $vgpr0_vgpr1 = COPY [[COPY1]](p0) + ; GFX9-LABEL: name: test_addrspacecast_p999_to_p0 + ; GFX9: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p999) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY1]](p0) + %0:_(p999) = COPY $vgpr0_vgpr1 + %1:_(p0) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p5_to_p0 +body: | + bb.0: + liveins: $vgpr0 + + ; VI-LABEL: name: test_addrspacecast_p5_to_p0 + ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559 + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 + ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]] + ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0 + ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p5), [[C]] + ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + %0:_(p5) = COPY $vgpr0 + %1:_(p0) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p0_to_p5 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p0_to_p5 + ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; VI: $vgpr0 = COPY [[SELECT]](p5) + ; GFX9-LABEL: name: test_addrspacecast_p0_to_p5 + ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(p5) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; GFX9: $vgpr0 = COPY [[SELECT]](p5) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(p5) = G_ADDRSPACE_CAST %0 + $vgpr0 = COPY %1 +... + +--- +name: test_addrspacecast_p3_to_p0 +body: | + bb.0: + liveins: $vgpr0 + + ; VI-LABEL: name: test_addrspacecast_p3_to_p0 + ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559 + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 + ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]] + ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + ; GFX9-LABEL: name: test_addrspacecast_p3_to_p0 + ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]] + ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) + ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](p0) + %0:_(p3) = COPY $vgpr0 + %1:_(p0) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_p0_to_p3 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_p0_to_p3 + ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0 + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; VI: $vgpr0 = COPY [[SELECT]](p3) + ; GFX9-LABEL: name: test_addrspacecast_p0_to_p3 + ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[COPY]](p0), 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; GFX9: $vgpr0 = COPY [[SELECT]](p3) + %0:_(p0) = COPY $vgpr0_vgpr1 + %1:_(p3) = G_ADDRSPACE_CAST %0 + $vgpr0 = COPY %1 +... + +--- +name: test_addrspacecast_v2p0_to_v2p1 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; VI-LABEL: name: test_addrspacecast_v2p0_to_v2p1 + ; VI: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) + ; VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[UV]](p0) + ; VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[UV1]](p0) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[COPY1]](p1), [[COPY2]](p1) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) + ; GFX9-LABEL: name: test_addrspacecast_v2p0_to_v2p1 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) + ; GFX9: [[COPY1:%[0-9]+]]:_(p1) = COPY [[UV]](p0) + ; GFX9: [[COPY2:%[0-9]+]]:_(p1) = COPY [[UV1]](p0) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[COPY1]](p1), [[COPY2]](p1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) + %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x p1>) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_addrspacecast_v2p1_to_v2p0 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; VI-LABEL: name: test_addrspacecast_v2p1_to_v2p0 + ; VI: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>) + ; VI: [[COPY1:%[0-9]+]]:_(p0) = COPY [[UV]](p1) + ; VI: [[COPY2:%[0-9]+]]:_(p0) = COPY [[UV1]](p1) + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY2]](p0) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) + ; GFX9-LABEL: name: test_addrspacecast_v2p1_to_v2p0 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>) + ; GFX9: [[COPY1:%[0-9]+]]:_(p0) = COPY [[UV]](p1) + ; GFX9: [[COPY2:%[0-9]+]]:_(p0) = COPY [[UV1]](p1) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[COPY1]](p0), [[COPY2]](p0) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) + %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... + +--- +name: test_addrspacecast_v2p0_to_v2p3 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; VI-LABEL: name: test_addrspacecast_v2p0_to_v2p3 + ; VI: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; VI: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) + ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0 + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]] + ; VI: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; VI: [[C2:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; VI: [[C3:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[EXTRACT1:%[0-9]+]]:_(p3) = G_EXTRACT [[UV1]](p0), 0 + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C3]] + ; VI: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C2]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3) + ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + ; GFX9-LABEL: name: test_addrspacecast_v2p0_to_v2p3 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX9: [[UV:%[0-9]+]]:_(p0), [[UV1:%[0-9]+]]:_(p0) = G_UNMERGE_VALUES [[COPY]](<2 x p0>) + ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[EXTRACT:%[0-9]+]]:_(p3) = G_EXTRACT [[UV]](p0), 0 + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p0), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(p3) = G_SELECT [[ICMP]](s1), [[EXTRACT]], [[C]] + ; GFX9: [[C2:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; GFX9: [[C3:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[EXTRACT1:%[0-9]+]]:_(p3) = G_EXTRACT [[UV1]](p0), 0 + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p0), [[C3]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(p3) = G_SELECT [[ICMP1]](s1), [[EXTRACT1]], [[C2]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[SELECT]](p3), [[SELECT1]](p3) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) + %0:_(<2 x p0>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<2 x p3>) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_addrspacecast_v2p3_to_v2p0 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; VI-LABEL: name: test_addrspacecast_v2p3_to_v2p0 + ; VI: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 + ; VI: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) + ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[C2:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559 + ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 + ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[C2]], [[C3]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4) + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] + ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) + ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) + ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; VI: [[C4:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; VI: [[C5:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; VI: [[C6:%[0-9]+]]:_(p4) = G_CONSTANT i64 3735928559 + ; VI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 + ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[C6]], [[C7]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4) + ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C4]] + ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) + ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) + ; VI: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C5]] + ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) + ; GFX9-LABEL: name: test_addrspacecast_v2p3_to_v2p0 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 + ; GFX9: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) + ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] + ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) + ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32) + ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] + ; GFX9: [[C3:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 + ; GFX9: [[C4:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; GFX9: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C5]](s32) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C3]] + ; GFX9: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) + ; GFX9: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[SHL1]](s32) + ; GFX9: [[SELECT1:%[0-9]+]]:_(p0) = G_SELECT [[ICMP1]](s1), [[MV1]], [[C4]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[SELECT]](p0), [[SELECT1]](p0) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p0>) + %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 + %1:_(<2 x p0>) = G_ADDRSPACE_CAST %0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +...