Index: lib/Target/R600/R600ISelLowering.cpp =================================================================== --- lib/Target/R600/R600ISelLowering.cpp +++ lib/Target/R600/R600ISelLowering.cpp @@ -1392,7 +1392,12 @@ // The first 36 bytes of the input buffer contains information about // thread group and global sizes. - SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, + + // FIXME: This should really check the extload type, but the handling of + // extload vecto parameters seems to be broken. + //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + ISD::LoadExtType Ext = ISD::SEXTLOAD; + SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain, DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, false, false, 4); Index: lib/Target/R600/SIISelLowering.h =================================================================== --- lib/Target/R600/SIISelLowering.h +++ lib/Target/R600/SIISelLowering.h @@ -22,7 +22,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, - SDValue Chain, unsigned Offset) const; + SDValue Chain, unsigned Offset, bool Signed) const; SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -224,7 +224,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, SDValue Chain, - unsigned Offset) const { + unsigned Offset, bool Signed) const { MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::CONSTANT_ADDRESS); @@ -232,7 +232,7 @@ MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64); SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(Offset, MVT::i64)); - return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr, + return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr, MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, false, false, MemVT.getSizeInBits() >> 3); @@ -340,7 +340,8 @@ // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), - 36 + VA.getLocMemOffset()); + 36 + VA.getLocMemOffset(), + Ins[i].Flags.isSExt()); InVals.push_back(Arg); continue; } @@ -533,23 +534,23 @@ switch (IntrinsicID) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case Intrinsic::r600_read_ngroups_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false); case Intrinsic::r600_read_ngroups_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false); case Intrinsic::r600_read_ngroups_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false); case Intrinsic::r600_read_global_size_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false); case Intrinsic::r600_read_global_size_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false); case Intrinsic::r600_read_global_size_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false); case Intrinsic::r600_read_local_size_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false); case Intrinsic::r600_read_local_size_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false); case Intrinsic::r600_read_local_size_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false); case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);