Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -587,23 +587,31 @@ unsigned Offset, bool Signed) const { const DataLayout &DL = DAG.getDataLayout(); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); - SDValue PtrOffset = DAG.getUNDEF(PtrVT); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned Align = DL.getABITypeAlignment(Ty); - ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset); + SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align, + MachineMemOperand::MONonTemporal | + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); + + SDValue Val; if (MemVT.isFloatingPoint()) - ExtTy = ISD::EXTLOAD; + Val = DAG.getNode(ISD::FP_EXTEND, SL, VT, Load); + else if (Signed) + Val = DAG.getSExtOrTrunc(Load, SL, VT); + else + Val = DAG.getZExtOrTrunc(Load, SL, VT); - SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset); - return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset, - PtrInfo, MemVT, Align, - MachineMemOperand::MONonTemporal | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant); + SDValue Ops[] = { + Val, + Load.getValue(1) + }; + + return DAG.getMergeValues(Ops, SL); } SDValue SITargetLowering::LowerFormalArguments( Index: llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll +++ llvm/trunk/test/CodeGen/AMDGPU/merge-stores.ll @@ -149,12 +149,8 @@ ret void } -; FIXME: Should be able to merge this ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v -; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dwordx4 v ; GCN-AA: buffer_store_dwordx2 ; GCN-AA: buffer_store_dword v