Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -18,6 +18,7 @@ #include "AMDGPUSubtarget.h" #include "SIISelLowering.h" #include "SIMachineFunctionInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -607,10 +608,16 @@ bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const { if (!N->readMem()) return false; - if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) - if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - N->getMemoryVT().bitsLT(MVT::i32)) + if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { + if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + return !isa( + GetUnderlyingObject(N->getMemOperand()->getValue(), + CurDAG->getDataLayout())); + + //TODO: Why do we need this? + if (N->getMemoryVT().bitsLT(MVT::i32)) return true; + } return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); } Index: llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -429,13 +429,15 @@ if (Literals[i]->isImm()) { MILit.addImm(Literals[i]->getImm()); } else { - MILit.addImm(0); + MILit.addGlobalAddress(Literals[i]->getGlobal(), + Literals[i]->getOffset()); } if (i + 1 < e) { if (Literals[i + 1]->isImm()) { MILit.addImm(Literals[i + 1]->getImm()); } else { - MILit.addImm(0); + MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), + Literals[i + 1]->getOffset()); } } else MILit.addImm(0); Index: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -269,6 +269,16 @@ TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); break; + case AMDGPU::MOV_IMM_GLOBAL_ADDR: { + //TODO: Perhaps combine this instruction with the next if possible + auto MIB = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, + MI->getOperand(0).getReg(), + AMDGPU::ALU_LITERAL_X); + int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal); + //TODO: Ugh this is rather ugly + MIB->getOperand(Idx) = MI->getOperand(1); + break; + } case AMDGPU::CONST_COPY: { MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, MI->getOperand(0).getReg(), AMDGPU::ALU_CONST); @@ -914,43 +924,10 @@ const DataLayout &DL = DAG.getDataLayout(); const GlobalValue *GV = GSD->getGlobal(); - MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); - Type *EltType = GV->getValueType(); - unsigned Size = DL.getTypeAllocSize(EltType); - unsigned Alignment = DL.getPrefTypeAlignment(EltType); - - MVT PrivPtrVT = getPointerTy(DL, AMDGPUAS::PRIVATE_ADDRESS); MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); - int FI = FrameInfo->CreateStackObject(Size, Alignment, false); - SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT); - - const GlobalVariable *Var = cast(GV); - if (!Var->hasInitializer()) { - // This has no use, but bugpoint will hit it. - return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT); - } - - const Constant *Init = Var->getInitializer(); - SmallVector WorkList; - - for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(), - E = DAG.getEntryNode()->use_end(); I != E; ++I) { - if (I->getOpcode() != AMDGPUISD::REGISTER_LOAD && I->getOpcode() != ISD::LOAD) - continue; - WorkList.push_back(*I); - } - SDValue Chain = LowerConstantInitializer(Init, GV, InitPtr, DAG.getEntryNode(), DAG); - for (SmallVector::iterator I = WorkList.begin(), - E = WorkList.end(); I != E; ++I) { - SmallVector Ops; - Ops.push_back(Chain); - for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) { - Ops.push_back((*I)->getOperand(i)); - } - DAG.UpdateNodeOperands(*I, Ops); - } - return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); + return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); } SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { @@ -1604,22 +1581,6 @@ SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - // Lower loads constant address space global variable loads - if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && - isa(GetUnderlyingObject( - LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) { - - SDValue Ptr = DAG.getZExtOrTrunc( - LoadNode->getBasePtr(), DL, - getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS)); - Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, - DAG.getConstant(2, DL, MVT::i32)); - return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(), - LoadNode->getChain(), Ptr, - DAG.getTargetConstant(0, DL, MVT::i32), - Op.getOperand(2)); - } - if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { SDValue MergedValues[2] = { scalarizeVectorLoad(LoadNode, DAG), Index: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td @@ -760,6 +760,13 @@ (MOV_IMM_I32 imm:$val) >; +def MOV_IMM_GLOBAL_ADDR : MOV_IMM; +def : Pat < + (AMDGPUconstdata_ptr tglobaladdr:$addr), + (MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr) +>; + + def MOV_IMM_F32 : MOV_IMM; def : Pat < (fpimm:$val), Index: llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll +++ llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace-fail.ll @@ -1,57 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - - -@a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1 - -; FUNC-LABEL: {{^}}test_i8: -; EG: CF_END -; SI: buffer_store_byte -; SI: s_endpgm -define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 { - %arrayidx = getelementptr inbounds [1 x i8], [1 x i8] addrspace(2)* @a, i32 0, i32 %s - %1 = load i8, i8 addrspace(2)* %arrayidx, align 1 - store i8 %1, i8 addrspace(1)* %out - ret void -} - -@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 - -; FUNC-LABEL: {{^}}test_i16: -; EG: CF_END -; SI: buffer_store_short -; SI: s_endpgm -define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 { - %arrayidx = getelementptr inbounds [1 x i16], [1 x i16] addrspace(2)* @b, i32 0, i32 %s - %1 = load i16, i16 addrspace(2)* %arrayidx, align 2 - store i16 %1, i16 addrspace(1)* %out - ret void -} - -%struct.bar = type { float, [5 x i8] } - -; The illegal i8s aren't handled -@struct_bar_gv = internal addrspace(2) constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ] - -; FUNC-LABEL: {{^}}struct_bar_gv_load: -define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) { - %gep = getelementptr inbounds [1 x %struct.bar], [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index - %load = load i8, i8 addrspace(2)* %gep, align 1 - store i8 %load, i8 addrspace(1)* %out, align 1 - ret void -} - - -; The private load isn't scalarzied. -@array_vector_gv = internal addrspace(2) constant [4 x <4 x i32>] [ <4 x i32> , - <4 x i32> , - <4 x i32> , - <4 x i32> ] - -; FUNC-LABEL: {{^}}array_vector_gv_load: -define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) { - %gep = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index - %load = load <4 x i32>, <4 x i32> addrspace(2)* %gep, align 16 - store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16 - ret void -} Index: llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll +++ llvm/trunk/test/CodeGen/AMDGPU/gv-const-addrspace.ll @@ -1,6 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 @@ -10,13 +11,9 @@ ; FUNC-LABEL: {{^}}float: ; GCN: s_load_dword -; EG-DAG: MOV {{\** *}}T2.X -; EG-DAG: MOV {{\** *}}T3.X -; EG-DAG: MOV {{\** *}}T4.X -; EG-DAG: MOV {{\** *}}T5.X -; EG-DAG: MOV {{\** *}}T6.X -; EG: MOVA_INT - +; EG: VTX_READ_32 +; EG: @float_gv +; EG-NOT: MOVA_INT define void @float(float addrspace(1)* %out, i32 %index) { entry: %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index @@ -31,13 +28,9 @@ ; GCN: s_load_dword -; EG-DAG: MOV {{\** *}}T2.X -; EG-DAG: MOV {{\** *}}T3.X -; EG-DAG: MOV {{\** *}}T4.X -; EG-DAG: MOV {{\** *}}T5.X -; EG-DAG: MOV {{\** *}}T6.X -; EG: MOVA_INT - +; EG: VTX_READ_32 +; EG: @i32_gv +; EG-NOT: MOVA_INT define void @i32(i32 addrspace(1)* %out, i32 %index) { entry: %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index @@ -54,6 +47,9 @@ ; FUNC-LABEL: {{^}}struct_foo_gv_load: ; GCN: s_load_dword +; EG: VTX_READ_32 +; EG: @struct_foo_gv +; EG-NOT: MOVA_INT define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index %load = load i32, i32 addrspace(2)* %gep, align 4 @@ -68,6 +64,10 @@ ; FUNC-LABEL: {{^}}array_v1_gv_load: ; GCN: s_load_dword + +; EG: VTX_READ_32 +; EG: @array_v1_gv +; EG-NOT: MOVA_INT define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4 @@ -75,6 +75,11 @@ ret void } +; FUNC-LABEL: {{^}}gv_addressing_in_branch: + +; EG: VTX_READ_32 +; EG: @float_gv +; EG-NOT: MOVA_INT define void @gv_addressing_in_branch(float addrspace(1)* %out, i32 %index, i32 %a) { entry: %0 = icmp eq i32 0, %a