Index: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1431,6 +1431,14 @@ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; //===----------------------------------------------------------------------===// +// GFX10 Intrinsics +//===----------------------------------------------------------------------===// + +def int_amdgcn_s_get_waveid_in_workgroup : + GCCBuiltin<"__builtin_amdgcn_s_get_waveid_in_workgroup">, + Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>; + +//===----------------------------------------------------------------------===// // Deep learning intrinsics. //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -125,10 +125,10 @@ bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const; + SDValue &TFE, SDValue &DLC) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE) const; + SDValue &SLC, SDValue &TFE, SDValue &DLC) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; @@ -141,19 +141,19 @@ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const; + SDValue &TFE, SDValue &DLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; - bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, + bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; - bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, + bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; template - bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, + bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, @@ -1221,7 +1221,7 @@ SDValue &Offset, SDValue &Offen, SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const { + SDValue &TFE, SDValue &DLC) const { // Subtarget prefers to use flat instruction if (Subtarget->useFlatForGlobal()) return false; @@ -1233,6 +1233,7 @@ if (!SLC.getNode()) SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); + DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1311,7 +1312,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &GLC, - SDValue &SLC, SDValue &TFE) const { + SDValue &SLC, SDValue &TFE, + SDValue &DLC) const { SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. @@ -1319,7 +1321,7 @@ return false; if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE)) + GLC, SLC, TFE, DLC)) return false; ConstantSDNode *C = cast(Addr64); @@ -1341,9 +1343,9 @@ SDValue &Offset, SDValue &SLC) const { SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); - SDValue GLC, TFE; + SDValue GLC, TFE, DLC; - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); } static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { @@ -1468,13 +1470,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, - SDValue &TFE) const { + SDValue &TFE, SDValue &DLC) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = static_cast(Subtarget->getInstrInfo()); if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - GLC, SLC, TFE)) + GLC, SLC, TFE, DLC)) return false; if (!cast(Offen)->getSExtValue() && @@ -1496,57 +1498,42 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset ) const { - SDValue GLC, SLC, TFE; + SDValue GLC, SLC, TFE, DLC; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &SLC) const { - SDValue GLC, TFE; + SDValue GLC, TFE, DLC; - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); } template -bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - int64_t OffsetVal = 0; - - if (Subtarget->hasFlatInstOffsets() && - CurDAG->isBaseWithConstantOffset(Addr)) { - SDValue N0 = Addr.getOperand(0); - SDValue N1 = Addr.getOperand(1); - int64_t COffsetVal = cast(N1)->getSExtValue(); - - if ((IsSigned && isInt<13>(COffsetVal)) || - (!IsSigned && isUInt<12>(COffsetVal))) { - Addr = N0; - OffsetVal = COffsetVal; - } - } - - VAddr = Addr; - Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); - SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); - - return true; + return static_cast(getTargetLowering())-> + SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); } -bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return SelectFlatOffset(Addr, VAddr, Offset, SLC); + return SelectFlatOffset(N, Addr, VAddr, Offset, SLC); } -bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return SelectFlatOffset(Addr, VAddr, Offset, SLC); + return SelectFlatOffset(N, Addr, VAddr, Offset, SLC); } bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -323,6 +323,10 @@ } AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + + bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N, + SDValue Addr, SDValue &VAddr, SDValue &Offset, + SDValue &SLC) const; }; namespace AMDGPUISD { Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2886,6 +2886,61 @@ return true; } +// Find a load or store from corresponding pattern root. +// Roots may be build_vector, bitconvert or their combinations. +static MemSDNode* findMemSDNode(SDNode *N) { + N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); + if (MemSDNode *MN = dyn_cast(N)) + return MN; + assert(isa(N)); + for (SDValue V : N->op_values()) + if (MemSDNode *MN = + dyn_cast(AMDGPUTargetLowering::stripBitcast(V))) + return MN; + llvm_unreachable("cannot find MemSDNode in the pattern!"); +} + +bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned, + SelectionDAG &DAG, + SDNode *N, + SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + const GCNSubtarget &ST = + DAG.getMachineFunction().getSubtarget(); + int64_t OffsetVal = 0; + + if (ST.hasFlatInstOffsets() && + (!ST.hasFlatSegmentOffsetBug() || + findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) && + DAG.isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + int64_t COffsetVal = cast(N1)->getSExtValue(); + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { + if ((IsSigned && isInt<12>(COffsetVal)) || + (!IsSigned && isUInt<11>(COffsetVal))) { + Addr = N0; + OffsetVal = COffsetVal; + } + } else { + if ((IsSigned && isInt<13>(COffsetVal)) || + (!IsSigned && isUInt<12>(COffsetVal))) { + Addr = N0; + OffsetVal = COffsetVal; + } + } + } + + VAddr = Addr; + Offset = DAG.getTargetConstant(OffsetVal, SDLoc(), MVT::i16); + SLC = DAG.getTargetConstant(0, SDLoc(), MVT::i1); + + return true; +} + // Replace load of an illegal type with a store of a bitcast to a friendlier // type. SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -356,7 +356,8 @@ .add(I.getOperand(0)) .addImm(0) // offset .addImm(0) // glc - .addImm(0); // slc + .addImm(0) // slc + .addImm(0); // dlc // Now that we selected an opcode, we need to constrain the register @@ -532,7 +533,8 @@ .addReg(PtrReg) .addImm(0) // offset .addImm(0) // glc - .addImm(0); // slc + .addImm(0) // slc + .addImm(0); // dlc bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); I.eraseFromParent(); Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -139,6 +139,7 @@ ImmTyInstOffset, ImmTyOffset0, ImmTyOffset1, + ImmTyDLC, ImmTyGLC, ImmTySLC, ImmTyTFE, @@ -314,6 +315,7 @@ bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isLDS() const { return isImmTy(ImmTyLDS); } + bool isDLC() const { return isImmTy(ImmTyDLC); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } bool isTFE() const { return isImmTy(ImmTyTFE); } @@ -676,6 +678,7 @@ case ImmTyInstOffset: OS << "InstOffset"; break; case ImmTyOffset0: OS << "Offset0"; break; case ImmTyOffset1: OS << "Offset1"; break; + case ImmTyDLC: OS << "DLC"; break; case ImmTyGLC: OS << "GLC"; break; case ImmTySLC: OS << "SLC"; break; case ImmTyTFE: OS << "TFE"; break; @@ -1184,6 +1187,7 @@ void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); + AMDGPUOperand::Ptr defaultDLC() const; AMDGPUOperand::Ptr defaultGLC() const; AMDGPUOperand::Ptr defaultSLC() const; @@ -2303,13 +2307,26 @@ } } - if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { + if (TSFlags & SIInstrFlags::FLAT) { // FIXME: Produces error without correct column reported. - auto OpNum = - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); + auto Opcode = Inst.getOpcode(); + auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); + const auto &Op = Inst.getOperand(OpNum); - if (Op.getImm() != 0) + if (!hasFlatOffsets() && Op.getImm() != 0) return Match_InvalidOperand; + + // GFX10: Address offset is 12-bit signed byte offset. Must be positive for + // FLAT segment. For FLAT segment MSB is ignored and forced to zero. + if (isGFX10()) { + if (TSFlags & SIInstrFlags::IsNonFlatSeg) { + if (!isInt<12>(Op.getImm())) + return Match_InvalidOperand; + } else { + if (!isUInt<11>(Op.getImm())) + return Match_InvalidOperand; + } + } } return Match_Success; @@ -3887,6 +3904,9 @@ } } + if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) + return MatchOperand_ParseFail; + Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); return MatchOperand_Success; } @@ -5101,6 +5121,10 @@ // mubuf //===----------------------------------------------------------------------===// +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); +} + AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); } @@ -5177,6 +5201,9 @@ if (!IsLdsOpcode) { // tfe is not legal with lds opcodes addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); } + + if (isGFX10()) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); } void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { @@ -5214,6 +5241,9 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); + + if (isGFX10()) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); } //===----------------------------------------------------------------------===// @@ -5249,8 +5279,12 @@ } } + bool IsGFX10 = isGFX10(); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); + if (IsGFX10) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); @@ -5353,6 +5387,7 @@ {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, + {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, @@ -5581,7 +5616,7 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); } - // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): + // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): // it has src2 register operand that is tied to dst operand // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0. @@ -6031,7 +6066,8 @@ break; case SIInstrFlags::VOPC: - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; Index: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td @@ -7,13 +7,13 @@ //===----------------------------------------------------------------------===// def MUBUFAddr32 : ComplexPattern; -def MUBUFAddr64 : ComplexPattern; +def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; def MUBUFScratchOffen : ComplexPattern; def MUBUFScratchOffset : ComplexPattern; -def MUBUFOffset : ComplexPattern; +def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; @@ -96,7 +96,9 @@ bits<1> has_vdata = 1; bits<1> has_vaddr = 1; bits<1> has_glc = 1; + bits<1> has_dlc = 1; bits<1> glc_value = 0; // the value for glc if no such operand + bits<1> dlc_value = 0; // the value for dlc if no such operand bits<1> has_srsrc = 1; bits<1> has_soffset = 1; bits<1> has_offset = 1; @@ -119,6 +121,7 @@ bits<12> offset; bits<1> glc; + bits<1> dlc; bits<7> format; bits<8> vaddr; bits<8> vdata; @@ -137,17 +140,17 @@ RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList)); dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe), + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe) + offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc) ); dag InsData = !if(!empty(vaddrList), (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe), + SLC:$slc, TFE:$tfe, DLC:$dlc), (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc, - SLC:$slc, TFE:$tfe) + SLC:$slc, TFE:$tfe, DLC:$dlc) ); dag ret = !if(!empty(vdataList), InsNoData, InsData); } @@ -198,7 +201,7 @@ : MTBUF_Pseudo.ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe", + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; @@ -213,13 +216,13 @@ def _OFFSET : MTBUF_Load_Pseudo , + i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Load_Pseudo , + i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Load_Pseudo ; @@ -244,7 +247,7 @@ : MTBUF_Pseudo.ret, - " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe", + " $vdata, " # getMTBUFAsmOps.ret # "$glc$slc$tfe$dlc", pattern>, MTBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; @@ -259,13 +262,13 @@ def _OFFSET : MTBUF_Store_Pseudo , + i1:$slc, i1:$tfe, i1:$dlc))]>, MTBUFAddr64Table<0, NAME>; def _ADDR64 : MTBUF_Store_Pseudo , + i1:$slc, i1:$tfe, i1:$dlc))]>, MTBUFAddr64Table<1, NAME>; def _OFFEN : MTBUF_Store_Pseudo ; @@ -323,7 +326,9 @@ bits<1> has_vdata = 1; bits<1> has_vaddr = 1; bits<1> has_glc = 1; + bits<1> has_dlc = 1; bits<1> glc_value = 0; // the value for glc if no such operand + bits<1> dlc_value = 0; // the value for dlc if no such operand bits<1> has_srsrc = 1; bits<1> has_soffset = 1; bits<1> has_offset = 1; @@ -332,7 +337,7 @@ bits<4> dwords = 0; } -class MUBUF_Real op, MUBUF_Pseudo ps> : +class MUBUF_Real : InstSI { let isPseudo = 0; @@ -347,6 +352,7 @@ bits<12> offset; bits<1> glc; + bits<1> dlc; bits<8> vaddr; bits<8> vdata; bits<7> srsrc; @@ -357,7 +363,7 @@ // For cache invalidation instructions. -class MUBUF_Invalidate : +class MUBUF_Invalidate : MUBUF_Pseudo { let AsmMatchConverter = ""; @@ -372,7 +378,9 @@ let has_vdata = 0; let has_vaddr = 0; let has_glc = 0; + let has_dlc = 0; let glc_value = 0; + let dlc_value = 0; let has_srsrc = 0; let has_soffset = 0; let has_offset = 0; @@ -399,7 +407,7 @@ ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins), (ins TFE:$tfe)) + !if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc)) ); } @@ -459,7 +467,7 @@ !con(getMUBUFIns.ret, !if(HasTiedDest, (ins vdataClass:$vdata_in), (ins))), " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc" # - !if(isLds, " lds", "$tfe"), + !if(isLds, " lds", "$tfe") # "$dlc", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # !if(isLds, "_lds", "") # @@ -489,7 +497,7 @@ !if(isLds, [], [(set load_vt:$vdata, - (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))])>, + (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>, MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; def _ADDR64 : MUBUF_Load_Pseudo , + (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))])>, MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>; def _OFFEN : MUBUF_Load_Pseudo ; @@ -530,7 +538,7 @@ : MUBUF_Pseudo.ret, - " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe", + " $vdata, " # getMUBUFAsmOps.ret # "$glc$slc$tfe$dlc", pattern>, MUBUF_SetupAddr { let PseudoInstr = opName # "_" # getAddrName.ret; @@ -546,12 +554,12 @@ def _OFFSET : MUBUF_Store_Pseudo , + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>, MUBUFAddr64Table<0, NAME>; def _ADDR64 : MUBUF_Store_Pseudo , + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>, MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo ; @@ -637,6 +645,7 @@ let hasSideEffects = 1; let DisableWQM = 1; let has_glc = 0; + let has_dlc = 0; let has_tfe = 0; let maybeAtomic = 1; } @@ -655,6 +664,7 @@ AtomicNoRet.ret, 0> { let PseudoInstr = opName # "_" # getAddrName.ret; let glc_value = 0; + let dlc_value = 0; let AsmMatchConverter = "cvtMubufAtomic"; } @@ -672,6 +682,7 @@ AtomicNoRet.ret, 1> { let PseudoInstr = opName # "_rtn_" # getAddrName.ret; let glc_value = 1; + let dlc_value = 0; let Constraints = "$vdata = $vdata_in"; let DisableEncoding = "$vdata_in"; let AsmMatchConverter = "cvtMubufAtomicReturn"; @@ -1051,6 +1062,11 @@ } // End let SubtargetPredicate = isGFX7Plus +let SubtargetPredicate = isGFX10Plus in { + def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">; + def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">; +} // End SubtargetPredicate = isGFX10Plus + //===----------------------------------------------------------------------===// // MUBUF Patterns //===----------------------------------------------------------------------===// @@ -1063,6 +1079,10 @@ return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8); }]>; +def extract_dlc : SDNodeXFormgetTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); +}]>; + //===----------------------------------------------------------------------===// // buffer_load/store_format patterns //===----------------------------------------------------------------------===// @@ -1073,21 +1093,21 @@ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$cachepolicy, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, imm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1096,7 +1116,7 @@ (!cast(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1144,21 +1164,23 @@ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$cachepolicy, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (as_i16imm $offset), (extract_glc $cachepolicy), + (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$cachepolicy, imm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, - (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (as_i16imm $offset), (extract_glc $cachepolicy), + (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1167,8 +1189,8 @@ (!cast(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), - $rsrc, $soffset, (as_i16imm $offset), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), + (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1322,8 +1344,8 @@ class MUBUFLoad_PatternADDR64 : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) >; multiclass MUBUFLoad_Atomic_Pattern ; def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) >; } @@ -1355,8 +1377,8 @@ def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), - (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), + (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) >; } @@ -1377,12 +1399,12 @@ def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0) >; } @@ -1392,12 +1414,12 @@ ValueType vt, PatFrag ld_frag> { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in) >; } @@ -1435,12 +1457,12 @@ def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0) >; def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1454,8 +1476,8 @@ def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, i1:$glc, i1:$slc, i1:$tfe)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe) + i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)), + (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc) >; } @@ -1468,13 +1490,13 @@ def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0) + (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; } @@ -1512,7 +1534,7 @@ imm:$format, imm:$cachepolicy, 0)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1520,7 +1542,7 @@ imm:$format, imm:$cachepolicy, imm)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1528,7 +1550,7 @@ imm:$format, imm:$cachepolicy, 0)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1538,7 +1560,7 @@ (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1570,7 +1592,7 @@ imm:$format, imm:$cachepolicy, 0), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1578,7 +1600,7 @@ imm:$format, imm:$cachepolicy, imm), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1586,7 +1608,7 @@ imm:$format, imm:$cachepolicy, 0), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< @@ -1596,7 +1618,7 @@ $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), - (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0) + (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; } @@ -1626,24 +1648,18 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Base ENC_MUBUF for GFX6, GFX7. +// Base ENC_MUBUF for GFX6, GFX7, GFX10. //===----------------------------------------------------------------------===// -class MUBUF_Real_si op, MUBUF_Pseudo ps> : - MUBUF_Real, - Enc64, - SIMCInstr { - let AssemblerPredicate=isGFX6GFX7; - let DecoderNamespace="GFX6GFX7"; - +class Base_MUBUF_Real_gfx6_gfx7_gfx10 op, MUBUF_Pseudo ps, int ef> : + MUBUF_Real, Enc64, SIMCInstr { let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{15} = ps.addr64; let Inst{16} = !if(ps.lds, 1, 0); let Inst{24-18} = op; - let Inst{31-26} = 0x38; //encoding + let Inst{31-26} = 0x38; let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); @@ -1652,125 +1668,250 @@ let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -multiclass MUBUF_Real_AllAddr_si op> { - def _OFFSET_si : MUBUF_Real_si (NAME#"_OFFSET")>; - def _ADDR64_si : MUBUF_Real_si (NAME#"_ADDR64")>; - def _OFFEN_si : MUBUF_Real_si (NAME#"_OFFEN")>; - def _IDXEN_si : MUBUF_Real_si (NAME#"_IDXEN")>; - def _BOTHEN_si : MUBUF_Real_si (NAME#"_BOTHEN")>; -} - -multiclass MUBUF_Real_AllAddr_Lds_si op> { - - def _OFFSET_si : MUBUF_Real_si (NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_si">; - def _ADDR64_si : MUBUF_Real_si (NAME#"_ADDR64")>, - MUBUFLdsTable<0, NAME # "_ADDR64_si">; - def _OFFEN_si : MUBUF_Real_si (NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_si">; - def _IDXEN_si : MUBUF_Real_si (NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_si">; - def _BOTHEN_si : MUBUF_Real_si (NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_si">; - - def _LDS_OFFSET_si : MUBUF_Real_si (NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_si">; - def _LDS_ADDR64_si : MUBUF_Real_si (NAME#"_LDS_ADDR64")>, - MUBUFLdsTable<1, NAME # "_ADDR64_si">; - def _LDS_OFFEN_si : MUBUF_Real_si (NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_si">; - def _LDS_IDXEN_si : MUBUF_Real_si (NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_si">; - def _LDS_BOTHEN_si : MUBUF_Real_si (NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_si">; -} - -multiclass MUBUF_Real_Atomic_si op> : MUBUF_Real_AllAddr_si { - def _OFFSET_RTN_si : MUBUF_Real_si (NAME#"_OFFSET_RTN")>; - def _ADDR64_RTN_si : MUBUF_Real_si (NAME#"_ADDR64_RTN")>; - def _OFFEN_RTN_si : MUBUF_Real_si (NAME#"_OFFEN_RTN")>; - def _IDXEN_RTN_si : MUBUF_Real_si (NAME#"_IDXEN_RTN")>; - def _BOTHEN_RTN_si : MUBUF_Real_si (NAME#"_BOTHEN_RTN")>; -} - -defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_si <0x00>; -defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_si <0x01>; -defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x02>; -defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x03>; -defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_si <0x04>; -defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_si <0x05>; -defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_si <0x06>; -defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_si <0x07>; -defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_si <0x08>; -defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_si <0x09>; -defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_si <0x0a>; -defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_si <0x0b>; -defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_si <0x0c>; -defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_si <0x0d>; -defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_si <0x0e>; -defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_si <0x0f>; -defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_si <0x18>; -defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_si <0x1a>; -defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_si <0x1c>; -defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_si <0x1d>; -defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_si <0x1e>; -defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_si <0x1f>; - -defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomic_si <0x30>; -defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_si <0x31>; -defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomic_si <0x32>; -defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomic_si <0x33>; -//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomic_si <0x34>; // isn't on CI & VI -defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomic_si <0x35>; -defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomic_si <0x36>; -defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomic_si <0x37>; -defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomic_si <0x38>; -defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomic_si <0x39>; -defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomic_si <0x3a>; -defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomic_si <0x3b>; -defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomic_si <0x3c>; -defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_si <0x3d>; - -//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_si <0x3e>; // isn't on VI -//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomic_si <0x3f>; // isn't on VI -//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomic_si <0x40>; // isn't on VI -defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomic_si <0x50>; -defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_si <0x51>; -defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomic_si <0x52>; -defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomic_si <0x53>; -//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomic_si <0x54>; // isn't on CI & VI -defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomic_si <0x55>; -defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomic_si <0x56>; -defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomic_si <0x57>; -defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomic_si <0x58>; -defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_si <0x59>; -defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomic_si <0x5a>; -defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomic_si <0x5b>; -defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomic_si <0x5c>; -defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_si <0x5d>; -// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI. -//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomic_si <0x5e">; // isn't on VI -//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomic_si <0x5f>; // isn't on VI -//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomic_si <0x60>; // isn't on VI +class MUBUF_Real_gfx10 op, MUBUF_Pseudo ps> : + Base_MUBUF_Real_gfx6_gfx7_gfx10 { + let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{25} = op{7}; +} -def BUFFER_WBINVL1_SC_si : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>; -def BUFFER_WBINVL1_si : MUBUF_Real_si <0x71, BUFFER_WBINVL1>; +class MUBUF_Real_gfx6_gfx7 op, MUBUF_Pseudo ps> : + Base_MUBUF_Real_gfx6_gfx7_gfx10 { + let Inst{15} = ps.addr64; +} -class MTBUF_Real_si op, MTBUF_Pseudo ps> : - MTBUF_Real, - Enc64, - SIMCInstr { - let AssemblerPredicate=isGFX6GFX7; - let DecoderNamespace="GFX6GFX7"; +//===----------------------------------------------------------------------===// +// MUBUF - GFX10. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + multiclass MUBUF_Real_gfx10_with_name op, string opName, + string asmName> { + def _gfx10 : MUBUF_Real_gfx10(opName)> { + MUBUF_Pseudo ps = !cast(opName); + let AsmString = asmName # ps.AsmOperands; + } + } + multiclass MUBUF_Real_AllAddr_gfx10 op> { + def _BOTHEN_gfx10 : + MUBUF_Real_gfx10(NAME#"_BOTHEN")>; + def _IDXEN_gfx10 : + MUBUF_Real_gfx10(NAME#"_IDXEN")>; + def _OFFEN_gfx10 : + MUBUF_Real_gfx10(NAME#"_OFFEN")>; + def _OFFSET_gfx10 : + MUBUF_Real_gfx10(NAME#"_OFFSET")>; + } + multiclass MUBUF_Real_AllAddr_Lds_gfx10 op> { + def _OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFSET")>, + MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">; + def _OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_OFFEN")>, + MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">; + def _IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_IDXEN")>, + MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">; + def _BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_BOTHEN")>, + MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">; + + def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFSET")>, + MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">; + def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_OFFEN")>, + MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">; + def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_IDXEN")>, + MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">; + def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10(NAME#"_LDS_BOTHEN")>, + MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">; + } + multiclass MUBUF_Real_Atomics_gfx10 op> : + MUBUF_Real_AllAddr_gfx10 { + def _BOTHEN_RTN_gfx10 : + MUBUF_Real_gfx10(NAME#"_BOTHEN_RTN")>; + def _IDXEN_RTN_gfx10 : + MUBUF_Real_gfx10(NAME#"_IDXEN_RTN")>; + def _OFFEN_RTN_gfx10 : + MUBUF_Real_gfx10(NAME#"_OFFEN_RTN")>; + def _OFFSET_RTN_gfx10 : + MUBUF_Real_gfx10(NAME#"_OFFSET_RTN")>; + } +} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" + +defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; +defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>; +defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x020>; +defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x021>; +defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx10<0x022>; +defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x023>; +defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx10<0x024>; +defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x025>; +// FIXME-GFX10: Add following instructions: +//defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x026>; +//defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx10<0x027>; +defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x080>; +defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x081>; +defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x082>; +defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x083>; +defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx10<0x084>; +defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx10<0x085>; +defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx10<0x086>; +defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx10<0x087>; + +def BUFFER_GL0_INV_gfx10 : + MUBUF_Real_gfx10<0x071, BUFFER_GL0_INV>; +def BUFFER_GL1_INV_gfx10 : + MUBUF_Real_gfx10<0x072, BUFFER_GL1_INV>; + +//===----------------------------------------------------------------------===// +// MUBUF - GFX6, GFX7, GFX10. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" in { + multiclass MUBUF_Real_gfx6 op> { + def _gfx6 : MUBUF_Real_gfx6_gfx7(NAME)>; + } +} // End AssemblerPredicate = isGFX6, DecoderNamespace = "GFX6" + +let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { + multiclass MUBUF_Real_gfx7 op> { + def _gfx7 : MUBUF_Real_gfx6_gfx7(NAME)>; + } +} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" +let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { + multiclass MUBUF_Real_AllAddr_gfx6_gfx7 op> { + def _ADDR64_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>; + def _BOTHEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>; + def _IDXEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>; + def _OFFEN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>; + def _OFFSET_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>; + } + multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7 op> { + def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>, + MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">; + def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>, + MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">; + def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>, + MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">; + def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>, + MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">; + def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>, + MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">; + + def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFSET")>, + MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">; + def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_ADDR64")>, + MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">; + def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_OFFEN")>, + MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">; + def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_IDXEN")>, + MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">; + def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7(NAME#"_LDS_BOTHEN")>, + MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; + } + multiclass MUBUF_Real_Atomics_gfx6_gfx7 op> : + MUBUF_Real_AllAddr_gfx6_gfx7 { + def _ADDR64_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_ADDR64_RTN")>; + def _BOTHEN_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_BOTHEN_RTN")>; + def _IDXEN_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_IDXEN_RTN")>; + def _OFFEN_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_OFFEN_RTN")>; + def _OFFSET_RTN_gfx6_gfx7 : + MUBUF_Real_gfx6_gfx7(NAME#"_OFFSET_RTN")>; + } +} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" + +multiclass MUBUF_Real_AllAddr_gfx6_gfx7_gfx10 op> : + MUBUF_Real_AllAddr_gfx6_gfx7, MUBUF_Real_AllAddr_gfx10; + +multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10 op> : + MUBUF_Real_AllAddr_Lds_gfx6_gfx7, MUBUF_Real_AllAddr_Lds_gfx10; + +multiclass MUBUF_Real_Atomics_gfx6_gfx7_gfx10 op> : + MUBUF_Real_Atomics_gfx6_gfx7, MUBUF_Real_Atomics_gfx10; + +// FIXME-GFX6: Following instructions are available only on GFX6. +//defm BUFFER_ATOMIC_RSUB : MUBUF_Real_Atomics_gfx6 <0x034>; +//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Real_Atomics_gfx6 <0x054>; + +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x000>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>; +defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x008>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x009>; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00a>; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00b>; +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_gfx6_gfx7_gfx10<0x00c>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00d>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00e>; +defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x00f>; +defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x018>; +defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01a>; +defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01c>; +defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01d>; +defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01e>; +defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x01f>; + +defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x030>; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x031>; +defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x032>; +defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x033>; +defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x035>; +defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x036>; +defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x037>; +defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x038>; +defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x039>; +defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03a>; +defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03b>; +defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03c>; +defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03d>; +// FIXME-GFX6-GFX7-GFX10: Add following instructions: +//defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03e>; +//defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x03f>; +//defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x040>; +defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x050>; +defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x051>; +defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x052>; +defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x053>; +defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x055>; +defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x056>; +defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x057>; +defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x058>; +defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x059>; +defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05a>; +defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05b>; +defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05c>; +defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05d>; +// FIXME-GFX7: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on GFX7. +// FIXME-GFX6-GFX7-GFX10: Add following instructions: +//defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>; +//defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>; +//defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>; + +defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>; +defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; +def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>; + +//===----------------------------------------------------------------------===// +// Base ENC_MTBUF for GFX6, GFX7, GFX10. +//===----------------------------------------------------------------------===// + +class Base_MTBUF_Real_gfx6_gfx7_gfx10 op, MTBUF_Pseudo ps, int ef> : + MTBUF_Real, Enc64, SIMCInstr { let Inst{11-0} = !if(ps.has_offset, offset, ?); let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, glc, ps.glc_value); - let Inst{15} = ps.addr64; let Inst{18-16} = op; - let Inst{22-19} = dfmt; - let Inst{25-23} = nfmt; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata, ?); @@ -1780,43 +1921,83 @@ let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } -multiclass MTBUF_Real_AllAddr_si op> { - def _OFFSET_si : MTBUF_Real_si (NAME#"_OFFSET")>; - def _ADDR64_si : MTBUF_Real_si (NAME#"_ADDR64")>; - def _OFFEN_si : MTBUF_Real_si (NAME#"_OFFEN")>; - def _IDXEN_si : MTBUF_Real_si (NAME#"_IDXEN")>; - def _BOTHEN_si : MTBUF_Real_si (NAME#"_BOTHEN")>; -} +//===----------------------------------------------------------------------===// +// MTBUF - GFX10. +//===----------------------------------------------------------------------===// + +class MTBUF_Real_gfx10 op, MTBUF_Pseudo ps> : + Base_MTBUF_Real_gfx6_gfx7_gfx10 { + let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value); + let Inst{25-19} = format; + let Inst{53} = op{3}; +} + +let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + multiclass MTBUF_Real_AllAddr_gfx10 op> { + def _BOTHEN_gfx10 : + MTBUF_Real_gfx10(NAME#"_BOTHEN")>; + def _IDXEN_gfx10 : + MTBUF_Real_gfx10(NAME#"_IDXEN")>; + def _OFFEN_gfx10 : + MTBUF_Real_gfx10(NAME#"_OFFEN")>; + def _OFFSET_gfx10 : + MTBUF_Real_gfx10(NAME#"_OFFSET")>; + } +} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_si <0>; -defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_si <1>; -defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_si <2>; -defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_si <3>; -defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_si <4>; -defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_si <5>; -defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_si <6>; -defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_si <7>; +defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>; +defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>; +defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00a>; +defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00b>; +defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x00c>; +defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x00d>; +defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx10<0x00e>; +defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx10<0x00f>; //===----------------------------------------------------------------------===// -// CI -// MTBUF - GFX6, GFX7. +// MTBUF - GFX6, GFX7, GFX10. //===----------------------------------------------------------------------===// -class MUBUF_Real_ci op, MUBUF_Pseudo ps> : - MUBUF_Real_si { - let AssemblerPredicate = isGFX7Only; - let DecoderNamespace = "GFX7"; +class MTBUF_Real_gfx6_gfx7 op, MTBUF_Pseudo ps> : + Base_MTBUF_Real_gfx6_gfx7_gfx10 { + let Inst{15} = ps.addr64; + let Inst{22-19} = dfmt; + let Inst{25-23} = nfmt; } -def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>; +let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { + multiclass MTBUF_Real_AllAddr_gfx6_gfx7 op> { + def _ADDR64_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7(NAME#"_ADDR64")>; + def _BOTHEN_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7(NAME#"_BOTHEN")>; + def _IDXEN_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7(NAME#"_IDXEN")>; + def _OFFEN_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7(NAME#"_OFFEN")>; + def _OFFSET_gfx6_gfx7 : + MTBUF_Real_gfx6_gfx7(NAME#"_OFFSET")>; + } +} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" + +multiclass MTBUF_Real_AllAddr_gfx6_gfx7_gfx10 op> : + MTBUF_Real_AllAddr_gfx6_gfx7, MTBUF_Real_AllAddr_gfx10; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x000>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x001>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x002>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x003>; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x004>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x005>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x006>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>; //===----------------------------------------------------------------------===// -// GFX8, GFX9 (VI). +// GFX8, GFX9 (VI). //===----------------------------------------------------------------------===// class MUBUF_Real_vi op, MUBUF_Pseudo ps> : - MUBUF_Real, + MUBUF_Real, Enc64, SIMCInstr { let AssemblerPredicate = isGFX8GFX9; @@ -1866,7 +2047,7 @@ } class MUBUF_Real_gfx80 op, MUBUF_Pseudo ps> : - MUBUF_Real, + MUBUF_Real, Enc64, SIMCInstr { let AssemblerPredicate=HasUnpackedD16VMem; Index: llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/FLATInstructions.td @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -def FLATAtomic : ComplexPattern; -def FLATOffset : ComplexPattern", [], [], -10>; +def FLATAtomic : ComplexPattern; +def FLATOffset : ComplexPattern", [], [SDNPWantRoot], -10>; -def FLATOffsetSigned : ComplexPattern", [], [], -10>; -def FLATSignedAtomic : ComplexPattern; +def FLATOffsetSigned : ComplexPattern", [], [SDNPWantRoot], -10>; +def FLATSignedAtomic : ComplexPattern; //===----------------------------------------------------------------------===// // FLAT classes @@ -51,6 +51,8 @@ bits<1> has_data = 1; bits<1> has_glc = 1; bits<1> glcValue = 0; + bits<1> has_dlc = 1; + bits<1> dlcValue = 0; let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); @@ -88,6 +90,7 @@ bits<1> slc; bits<1> glc; + bits<1> dlc; // Only valid on gfx9 bits<1> lds = 0; // XXX - What does this actually do? @@ -141,9 +144,9 @@ !con((ins VReg_64:$vaddr), !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)), - (ins GLC:$glc, SLC:$slc)), + (ins GLC:$glc, SLC:$slc, DLC:$dlc)), !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), - " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> { + " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { let has_data = 0; let mayLoad = 1; let has_saddr = HasSaddr; @@ -164,8 +167,8 @@ !con((ins VReg_64:$vaddr, vdataClass:$vdata), !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), (ins !if(HasSignedOffset,offset_s13,offset_u12):$offset)), - (ins GLC:$glc, SLC:$slc)), - " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> { + (ins GLC:$glc, SLC:$slc, DLC:$dlc)), + " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -198,9 +201,9 @@ opName, (outs regClass:$vdst), !if(EnableSaddr, - (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc), - (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)), - " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> { + (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), + (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), + " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> { let has_data = 0; let mayLoad = 1; let has_saddr = 1; @@ -214,9 +217,9 @@ opName, (outs), !if(EnableSaddr, - (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc), - (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc)), - " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> { + (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), + (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), + " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -248,6 +251,8 @@ let mayStore = 1; let has_glc = 0; let glcValue = 0; + let has_dlc = 0; + let dlcValue = 0; let has_vdst = 0; let maybeAtomic = 1; } @@ -258,6 +263,7 @@ let hasPostISelHook = 1; let has_vdst = 1; let glcValue = 1; + let dlcValue = 0; let PseudoInstr = NAME # "_RTN"; } @@ -492,8 +498,8 @@ defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat>; -// GFX7-only flat instructions. -let SubtargetPredicate = isGFX7Only in { +// GFX7-, GFX10-only flat instructions. +let SubtargetPredicate = isGFX7GFX10 in { defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>; @@ -513,7 +519,7 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", VReg_64, f64>; -} // End SubtargetPredicate = isGFX7Only +} // End SubtargetPredicate = isGFX7GFX10 let SubtargetPredicate = HasFlatGlobalInsts in { defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; @@ -656,6 +662,22 @@ } // End SubtargetPredicate = HasFlatScratchInsts +let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { + defm GLOBAL_ATOMIC_FCMPSWAP : + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; + defm GLOBAL_ATOMIC_FMIN : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; + defm GLOBAL_ATOMIC_FMAX : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; + defm GLOBAL_ATOMIC_FCMPSWAP_X2 : + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; + defm GLOBAL_ATOMIC_FMIN_X2 : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; + defm GLOBAL_ATOMIC_FMAX_X2 : + FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; +} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 + + //===----------------------------------------------------------------------===// // Flat Patterns //===----------------------------------------------------------------------===// @@ -663,51 +685,51 @@ // Patterns for global loads with no offset. class FlatLoadPat : GCNPat < (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), - (inst $vaddr, $offset, 0, $slc) + (inst $vaddr, $offset, 0, 0, $slc) >; class FlatLoadPat_D16 : GCNPat < (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc), vt:$in), - (inst $vaddr, $offset, 0, $slc, $in) + (inst $vaddr, $offset, 0, 0, $slc, $in) >; class FlatSignedLoadPat_D16 : GCNPat < (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc), vt:$in), - (inst $vaddr, $offset, 0, $slc, $in) + (inst $vaddr, $offset, 0, 0, $slc, $in) >; class FlatLoadAtomicPat : GCNPat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), - (inst $vaddr, $offset, 0, $slc) + (inst $vaddr, $offset, 0, 0, $slc) >; class FlatLoadSignedPat : GCNPat < (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), - (inst $vaddr, $offset, 0, $slc) + (inst $vaddr, $offset, 0, 0, $slc) >; class FlatStorePat : GCNPat < (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatStoreSignedPat : GCNPat < (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatStoreAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatStoreSignedAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), - (inst $vaddr, $data, $offset, 0, $slc) + (inst $vaddr, $data, $offset, 0, 0, $slc) >; class FlatAtomicPat ; defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; + + +//===----------------------------------------------------------------------===// +// GFX10. +//===----------------------------------------------------------------------===// + +class FLAT_Real_gfx10 op, FLAT_Pseudo ps> : + FLAT_Real, SIMCInstr { + let AssemblerPredicate = isGFX10Plus; + let DecoderNamespace = "GFX10"; + + let Inst{11-0} = {offset{12}, offset{10-0}}; + let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); + let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); + let Inst{55} = 0; +} + + +multiclass FLAT_Real_Base_gfx10 op> { + def _gfx10 : + FLAT_Real_gfx10(NAME)>; +} + +multiclass FLAT_Real_RTN_gfx10 op> { + def _RTN_gfx10 : + FLAT_Real_gfx10(NAME#"_RTN")>; +} + +multiclass FLAT_Real_SADDR_gfx10 op> { + def _SADDR_gfx10 : + FLAT_Real_gfx10(NAME#"_SADDR")>; +} + +multiclass FLAT_Real_SADDR_RTN_gfx10 op> { + def _SADDR_RTN_gfx10 : + FLAT_Real_gfx10(NAME#"_SADDR_RTN")>; +} + + +multiclass FLAT_Real_AllAddr_gfx10 op> : + FLAT_Real_Base_gfx10, + FLAT_Real_SADDR_gfx10; + +multiclass FLAT_Real_Atomics_gfx10 op> : + FLAT_Real_Base_gfx10, + FLAT_Real_RTN_gfx10; + +multiclass FLAT_Real_GlblAtomics_gfx10 op> : + FLAT_Real_AllAddr_gfx10, + FLAT_Real_RTN_gfx10, + FLAT_Real_SADDR_RTN_gfx10; + + +// ENC_FLAT. +defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; +defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; +defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; +defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; +defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; +defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; +defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; +defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; +defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; +defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; +defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; +defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; +defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; +defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; +defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; +defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; +defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; +defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; +defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; +defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; +defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; +defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; +defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; +defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; +defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; +defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; +defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; +defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; +defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; +defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; +defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; +defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; +defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; +defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; +defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; +defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; +defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; +defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; +defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; +defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; +defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; +defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; +defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; +defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; +defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; +defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; +defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; +defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; +defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; +defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; +defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; +defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; +defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; +defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; + + +// ENC_FLAT_GLBL. +defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; +defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; +defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; +defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; +defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; +defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; +defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; +defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; +defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; +defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; +defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; +defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; +defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; +defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; +defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; +defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; +defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; +defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; +defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; +defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; +defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; +defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; +defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; +defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; +defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; +defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; +defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; +defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; +defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; +defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; +defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; +defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; +defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; +defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; +defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; +defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; +defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; +defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; +defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; +defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; +defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; +defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; +defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; +defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; +defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; +defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; +defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; +defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; +defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; +defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; +defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; +defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; +defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; +defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; + + +// ENC_FLAT_SCRATCH. +defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; +defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; +defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; +defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; +defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; +defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; +defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; +defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; +defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; +defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; +defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; +defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; +defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; +defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; +defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; +defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; +defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; +defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; +defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; +defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; +defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; +defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -41,7 +41,8 @@ void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O, @@ -67,6 +68,8 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, Index: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -72,8 +72,14 @@ } void AMDGPUInstPrinter::printS13ImmDecOperand(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { - O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm())); + // GFX10: Address offset is 12-bit signed byte offset. + if (AMDGPU::isGFX10(STI)) { + O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm())); + } else { + O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm())); + } } void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, @@ -128,7 +134,7 @@ uint16_t Imm = MI->getOperand(OpNo).getImm(); if (Imm != 0) { O << ((OpNo == 0)? "offset:" : " offset:"); - printS13ImmDecOperand(MI, OpNo, O); + printS13ImmDecOperand(MI, OpNo, STI, O); } } @@ -173,6 +179,12 @@ printNamedBit(MI, OpNo, O, "gds"); } +void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, raw_ostream &O) { + if (AMDGPU::isGFX10(STI)) + printNamedBit(MI, OpNo, O, "dlc"); +} + void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { printNamedBit(MI, OpNo, O, "glc"); Index: llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -197,6 +197,11 @@ // Atomics dont have a GLC, so omit the field if not there. if (Glc) NewGlob->addOperand(MF, *Glc); + + MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc); + if (DLC) + NewGlob->addOperand(MF, *DLC); + NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); // _D16 have an vdst_in operand, copy it in. MachineOperand *VDstInOp = TII->getNamedOperand(MI, Index: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -70,6 +70,24 @@ // Do a 64-bit pointer add. if (ST.flatScratchIsPointer()) { + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo) + .addReg(FlatScrInitLo) + .addReg(ScratchWaveOffsetReg); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi) + .addReg(FlatScrInitHi) + .addImm(0); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). + addReg(FlatScrInitLo). + addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO | + (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)). + addReg(FlatScrInitHi). + addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI | + (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_))); + return; + } + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO) .addReg(FlatScrInitLo) .addReg(ScratchWaveOffsetReg); @@ -80,6 +98,8 @@ return; } + assert(ST.getGeneration() < AMDGPUSubtarget::GFX10); + // Copy the size in bytes. BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO) .addReg(FlatScrInitHi, RegState::Kill); @@ -423,6 +443,7 @@ .addReg(Rsrc01) .addImm(EncodedOffset) // offset .addImm(0) // glc + .addImm(0) // dlc .addReg(ScratchRsrcReg, RegState::ImplicitDefine) .addMemOperand(MMO); return; @@ -463,6 +484,7 @@ .addReg(MFI->getImplicitBufferPtrUserSGPR()) .addImm(0) // offset .addImm(0) // glc + .addImm(0) // dlc .addMemOperand(MMO) .addReg(ScratchRsrcReg, RegState::ImplicitDefine); } Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4166,6 +4166,10 @@ getNamedOperand(MI, AMDGPU::OpName::glc)) { MIB.addImm(GLC->getImm()); } + if (const MachineOperand *DLC = + getNamedOperand(MI, AMDGPU::OpName::dlc)) { + MIB.addImm(DLC->getImm()); + } MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -830,6 +830,7 @@ def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>; +def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; Index: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -131,6 +131,8 @@ bool GLC1; bool SLC0; bool SLC1; + bool DLC0; + bool DLC1; bool UseST64; SmallVector InstsToMove; }; @@ -323,7 +325,7 @@ if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) { return (EltOffset0 + CI.Width0 == EltOffset1 || EltOffset1 + CI.Width1 == EltOffset0) && - CI.GLC0 == CI.GLC1 && + CI.GLC0 == CI.GLC1 && CI.DLC0 == CI.DLC1 && (CI.InstClass == S_BUFFER_LOAD_IMM || CI.SLC0 == CI.SLC1); } @@ -637,6 +639,8 @@ CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); } + CI.DLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::dlc)->getImm(); + CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm(); } // Check both offsets fit in the reduced range. @@ -857,6 +861,7 @@ .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase)) .addImm(MergedOffset) // offset .addImm(CI.GLC0) // glc + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); std::pair SubRegIdx = getSubRegIdxs(CI); @@ -909,6 +914,7 @@ .addImm(CI.GLC0) // glc .addImm(CI.SLC0) // slc .addImm(0) // tfe + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); std::pair SubRegIdx = getSubRegIdxs(CI); @@ -1088,9 +1094,10 @@ MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(std::min(CI.Offset0, CI.Offset1)) // offset - .addImm(CI.GLC0) // glc - .addImm(CI.SLC0) // slc - .addImm(0) // tfe + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc .cloneMergedMemRefs({&*CI.I, &*CI.Paired}); moveInstsAfter(MIB, CI.InstsToMove); Index: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -536,6 +536,7 @@ .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe + .addImm(0) // dlc .cloneMemRefs(*MI); const MachineOperand *VDataIn = TII->getNamedOperand(*MI, @@ -639,6 +640,7 @@ .addImm(0) // glc .addImm(0) // slc .addImm(0) // tfe + .addImm(0) // dlc .addMemOperand(NewMMO); if (NumSubRegs > 1) @@ -769,6 +771,7 @@ .addReg(MFI->getScratchRSrcReg()) // sbase .addReg(OffsetReg, RegState::Kill) // soff .addImm(0) // glc + .addImm(0) // dlc .addMemOperand(MMO); continue; @@ -928,9 +931,10 @@ auto MIB = BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) - .addReg(MFI->getScratchRSrcReg()) // sbase - .addReg(OffsetReg, RegState::Kill) // soff - .addImm(0) // glc + .addReg(MFI->getScratchRSrcReg()) // sbase + .addReg(OffsetReg, RegState::Kill) // soff + .addImm(0) // glc + .addImm(0) // dlc .addMemOperand(MMO); if (NumSubRegs > 1 && i == 0) Index: llvm/trunk/lib/Target/AMDGPU/SMInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SMInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SMInstructions.td @@ -40,6 +40,7 @@ bits<1> has_sbase = 1; bits<1> has_sdst = 1; bit has_glc = 0; + bit has_dlc = 0; bits<1> has_offset = 1; bits<1> offset_is_imm = 0; } @@ -79,6 +80,7 @@ let mayLoad = 1; let mayStore = 0; let has_glc = 1; + let has_dlc = 1; } class SM_Store_Pseudo pattern = []> @@ -88,6 +90,7 @@ let mayLoad = 0; let mayStore = 1; let has_glc = 1; + let has_dlc = 1; let ScalarStore = 1; } @@ -108,21 +111,23 @@ RegisterClass dstClass> { def _IMM : SM_Load_Pseudo { + (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc), + " $sdst, $sbase, $offset$glc$dlc", []> { let offset_is_imm = 1; let BaseClass = baseClass; let PseudoInstr = opName # "_IMM"; let has_glc = 1; + let has_dlc = 1; } def _SGPR : SM_Load_Pseudo { + (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc), + " $sdst, $sbase, $offset$glc$dlc", []> { let BaseClass = baseClass; let PseudoInstr = opName # "_SGPR"; let has_glc = 1; + let has_dlc = 1; } } @@ -130,8 +135,8 @@ RegisterClass baseClass, RegisterClass srcClass> { def _IMM : SM_Store_Pseudo { + (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc), + " $sdata, $sbase, $offset$glc$dlc", []> { let offset_is_imm = 1; let BaseClass = baseClass; let SrcClass = srcClass; @@ -139,8 +144,8 @@ } def _SGPR : SM_Store_Pseudo { + (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc), + " $sdata, $sbase, $offset$glc$dlc", []> { let BaseClass = baseClass; let SrcClass = srcClass; let PseudoInstr = opName # "_SGPR"; @@ -184,6 +189,16 @@ def _SGPR : SM_Probe_Pseudo ; } +class SM_WaveId_Pseudo : SM_Pseudo< + opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins), + " $sdst", [(set i32:$sdst, (node))]> { + let hasSideEffects = 1; + let mayStore = 0; + let mayLoad = 1; + let has_sbase = 0; + let has_offset = 0; +} + //===----------------------------------------------------------------------===// // Scalar Atomic Memory Classes //===----------------------------------------------------------------------===// @@ -197,6 +212,7 @@ let mayLoad = 1; let mayStore = 1; let has_glc = 1; + let has_dlc = 1; // Should these be set? let ScalarStore = 1; @@ -212,9 +228,9 @@ SM_Atomic_Pseudo { let offset_is_imm = isImm; let PseudoInstr = opName # !if(isImm, @@ -272,6 +288,7 @@ "s_buffer_load_dwordx16", SReg_128, SReg_512 >; +let SubtargetPredicate = HasScalarStores in { defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>; defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>; @@ -287,7 +304,7 @@ defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores < "s_buffer_store_dwordx4", SReg_128, SReg_128 >; - +} // End SubtargetPredicate = HasScalarStores def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; @@ -297,15 +314,22 @@ } // let SubtargetPredicate = isGFX7GFX8GFX9 let SubtargetPredicate = isGFX8Plus in { +let OtherPredicates = [HasScalarStores] in { def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; +} // End OtherPredicates = [HasScalarStores] def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>; defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>; } // SubtargetPredicate = isGFX8Plus -let SubtargetPredicate = HasFlatScratchInsts, Uses = [FLAT_SCR] in { +let SubtargetPredicate = isGFX10Plus in { +def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">; +def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>; +} // End SubtargetPredicate = isGFX10Plus + +let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in { defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>; defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>; @@ -313,7 +337,7 @@ defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>; defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>; defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>; -} // SubtargetPredicate = HasFlatScratchInsts +} // SubtargetPredicate = HasScalarFlatScratchInsts let SubtargetPredicate = HasScalarAtomics in { @@ -375,7 +399,7 @@ } // let SubtargetPredicate = HasScalarAtomics -let SubtargetPredicate = isGFX9Only in { +let SubtargetPredicate = HasScalarAtomics in { defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">; defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">; } @@ -411,13 +435,13 @@ SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { def _IMM_si : SMRD_Real_si { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc); } // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _SGPR_si : SMRD_Real_si { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); } } @@ -464,10 +488,10 @@ SM_Load_Pseudo immPs = !cast(ps#_IMM), SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { def _IMM_vi : SMEM_Real_vi { - let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc); + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_vi : SMEM_Real_vi { - let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); } } @@ -485,11 +509,11 @@ // FIXME: The operand name $offset is inconsistent with $soff used // in the pseudo def _IMM_vi : SMEM_Real_Store_vi { - let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc); + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); } def _SGPR_vi : SMEM_Real_Store_vi { - let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc); + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); } } @@ -638,7 +662,7 @@ let AssemblerPredicates = [isGFX7Only]; let DecoderNamespace = "GFX7"; - let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc); + let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc); let LGKM_CNT = ps.LGKM_CNT; let SMRD = ps.SMRD; @@ -718,26 +742,26 @@ // 1. IMM offset def : GCNPat < (smrd_load (SMRDImm i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM") $sbase, $offset, 0)) + (vt (!cast(Instr#"_IMM") $sbase, $offset, 0, 0)) >; // 2. 32-bit IMM offset on CI def : GCNPat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { + (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> { let OtherPredicates = [isGFX7Only]; } // 3. SGPR offset def : GCNPat < (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0)) + (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0, 0)) >; // 4. No offset def : GCNPat < (vt (smrd_load (i64 SReg_64:$sbase))), - (vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0)) + (vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0, 0)) >; } @@ -745,20 +769,20 @@ // 1. Offset as an immediate def : GCNPat < (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc), - (vt (!cast(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc))) + (vt (!cast(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), 0)) >; // 2. 32-bit IMM offset on CI def : GCNPat < (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)), - (!cast(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc))> { + (!cast(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), 0)> { let OtherPredicates = [isGFX7Only]; } // 3. Offset loaded in an 32bit SGPR def : GCNPat < (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc), - (vt (!cast(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc))) + (vt (!cast(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), 0)) >; } @@ -801,3 +825,198 @@ >; } // let OtherPredicates = [isGFX8Plus] + +//===----------------------------------------------------------------------===// +// GFX10. +//===----------------------------------------------------------------------===// + +class SMEM_Real_gfx10 op, SM_Pseudo ps> : + SM_Real, SIMCInstr, Enc64 { + bit glc; + bit dlc; + + let AssemblerPredicates = [isGFX10Plus]; + let DecoderNamespace = "GFX10"; + + let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); + let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); + let Inst{14} = !if(ps.has_dlc, dlc, ?); + let Inst{16} = !if(ps.has_glc, glc, ?); + let Inst{25-18} = op; + let Inst{31-26} = 0x3d; + let Inst{51-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{19-0}, ?), ?); + let Inst{63-57} = !if(ps.offset_is_imm, !cast(SGPR_NULL.HWEncoding), + !if(ps.has_offset, offset{6-0}, ?)); +} + +multiclass SM_Real_Loads_gfx10 op, string ps, + SM_Load_Pseudo immPs = !cast(ps#_IMM), + SM_Load_Pseudo sgprPs = !cast(ps#_SGPR)> { + def _IMM_gfx10 : SMEM_Real_gfx10 { + let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + } + def _SGPR_gfx10 : SMEM_Real_gfx10 { + let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + } +} + +class SMEM_Real_Store_gfx10 op, SM_Pseudo ps> : SMEM_Real_gfx10 { + bits<7> sdata; + + let sdst = ?; + let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); +} + +multiclass SM_Real_Stores_gfx10 op, string ps, + SM_Store_Pseudo immPs = !cast(ps#_IMM), + SM_Store_Pseudo sgprPs = !cast(ps#_SGPR)> { + // FIXME: The operand name $offset is inconsistent with $soff used + // in the pseudo + def _IMM_gfx10 : SMEM_Real_Store_gfx10 { + let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc, DLC:$dlc); + } + + def _SGPR_gfx10 : SMEM_Real_Store_gfx10 { + let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc); + } +} + +defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">; +defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">; +defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">; +defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">; +defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">; + +let SubtargetPredicate = HasScalarFlatScratchInsts in { +defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">; +defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">; +defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">; +} // End SubtargetPredicate = HasScalarFlatScratchInsts + +defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">; +defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">; +defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">; +defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">; +defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">; + +let SubtargetPredicate = HasScalarStores in { +defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">; +defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">; +defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">; +let OtherPredicates = [HasScalarFlatScratchInsts] in { +defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">; +defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">; +defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">; +} // End OtherPredicates = [HasScalarFlatScratchInsts] +defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">; +defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">; +defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">; +} // End SubtargetPredicate = HasScalarStores + +def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>; +def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>; +def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>; +def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>; +def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>; + +let SubtargetPredicate = HasScalarStores in { +def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>; +} // End SubtargetPredicate = HasScalarStores + +multiclass SM_Real_Probe_gfx10 op, string ps> { + def _IMM_gfx10 : SMEM_Real_Store_gfx10 (ps#_IMM)>; + def _SGPR_gfx10 : SMEM_Real_Store_gfx10 (ps#_SGPR)>; +} + +defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">; +defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">; + +class SMEM_Atomic_Real_gfx10 op, SM_Atomic_Pseudo ps> + : SMEM_Real_gfx10 { + + bits<7> sdata; + bit dlc; + + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + + let glc = ps.glc; + + let Inst{14} = !if(ps.has_dlc, dlc, 0); + let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0}); +} + +multiclass SM_Real_Atomics_gfx10 op, string ps> { + def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM)>; + def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR)>; + def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_IMM_RTN)>; + def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 (ps#_SGPR_RTN)>; +} + +let SubtargetPredicate = HasScalarAtomics in { + +defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">; +defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">; +defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">; +defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">; +defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">; +defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">; +defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">; +defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">; +defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">; +defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">; +defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">; +defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">; +defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">; + +defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">; +defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">; +defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">; +defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">; +defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">; +defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">; +defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">; +defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">; +defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">; +defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">; +defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">; +defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">; +defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">; + +defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">; +defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">; +defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">; +defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">; +defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">; +defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">; +defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">; +defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">; +defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">; +defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">; +defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">; +defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">; +defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">; + +defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">; +defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">; +defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">; +defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">; +defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">; +defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">; +defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">; +defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">; +defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">; +defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">; +defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">; +defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">; +defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">; + +multiclass SM_Real_Discard_gfx10 op, string ps> { + def _IMM_gfx10 : SMEM_Real_gfx10 (ps#_IMM)>; + def _SGPR_gfx10 : SMEM_Real_gfx10 (ps#_SGPR)>; +} + +defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">; +defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">; + +} // End SubtargetPredicate = HasScalarAtomics Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -18,7 +18,7 @@ ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:sgpr(p1) = COPY $sgpr2_sgpr3 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_IMPLICIT_DEF Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir +++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir @@ -13,7 +13,7 @@ ; GCN-LABEL: name: implicit_def_s32 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p1) = COPY $vgpr3_vgpr4 %1:vgpr(s32) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store 4, addrspace 1) @@ -30,7 +30,7 @@ ; GCN-LABEL: name: implicit_def_s64 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORDX2 [[COPY]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p1) = COPY $vgpr3_vgpr4 %1:vgpr(s64) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store 8, addrspace 1) @@ -60,7 +60,7 @@ ; GCN-LABEL: name: implicit_def_p1 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p1) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -76,7 +76,7 @@ ; GCN-LABEL: name: implicit_def_p3 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p3) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -92,7 +92,7 @@ ; GCN-LABEL: name: implicit_def_p4 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p4) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 G_STORE %1, %0 :: (store 4, addrspace 1) Index: llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir +++ llvm/trunk/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir @@ -8,9 +8,9 @@ body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1 - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- @@ -20,11 +20,11 @@ body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2 - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -34,13 +34,13 @@ body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3 - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 - ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 - ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 + ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0 + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 + ; GCN-NEXT: S_ENDPGM 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 + $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 S_ENDPGM 0 ... --- @@ -50,15 +50,15 @@ body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4 - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 - ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 - ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 - ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 - $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 + ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0 + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 + ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0 + ; GCN-NEXT: S_ENDPGM 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 + $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 + $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0 S_ENDPGM 0 ... --- @@ -67,11 +67,11 @@ body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr - ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- @@ -81,9 +81,9 @@ body: | bb.0: ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo - ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- @@ -93,9 +93,9 @@ body: | bb.0: ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi - ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- @@ -105,9 +105,9 @@ body: | bb.0: ; GCN-LABEL: name: smrd_load8_overwrite_ptr - ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 + $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- @@ -119,46 +119,46 @@ body: | bb.0: ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4 - ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 + ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 ; GCN-NEXT: S_ENDPGM 0 - $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - - $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - - $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - - $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + + $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + + $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + + $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0 $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 S_ENDPGM 0 ... @@ -169,12 +169,12 @@ body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr - ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -184,11 +184,11 @@ body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -198,12 +198,12 @@ body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr - ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 + ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 - $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -213,11 +213,11 @@ body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr - ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 + ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 - $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0 + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 + $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0 S_ENDPGM 0 ... --- @@ -228,16 +228,16 @@ ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) - ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN: bb.1: ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 + ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 bb.0: - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0 + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 bb.1: - $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0 + $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -248,11 +248,11 @@ body: | bb.0: ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4 - ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 + ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0 - $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0 + S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0 + $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 S_ENDPGM 0 ... --- @@ -264,11 +264,11 @@ body: | bb.0: ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4 - ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0 - $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0 + $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -278,13 +278,13 @@ body: | bb.0: ; GCN-LABEL: name: valu_inst_breaks_smem_clause - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec - ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $vgpr8 = V_MOV_B32_e32 0, implicit $exec - $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -294,13 +294,13 @@ body: | bb.0: ; GCN-LABEL: name: salu_inst_breaks_smem_clause - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 - ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr8 = S_MOV_B32 0 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -309,13 +309,13 @@ body: | bb.0: ; GCN-LABEL: name: ds_inst_breaks_smem_clause - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec - ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec - $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -325,13 +325,13 @@ body: | bb.0: ; GCN-LABEL: name: flat_inst_breaks_smem_clause - ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 - ; GCN-NEXT: S_ENDPGM 0 - $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0 + ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 + ; GCN-NEXT: S_ENDPGM 0 + $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 + $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- @@ -341,11 +341,11 @@ body: | bb.0: ; GCN-LABEL: name: implicit_use_breaks_smem_clause - ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 + ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13 ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 + ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13 - $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0 + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13 + $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0 S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir +++ llvm/trunk/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir @@ -7,10 +7,10 @@ body: | bb.0: ; GCN-LABEL: name: trivial_clause_load_flat4_x1 - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -20,12 +20,12 @@ body: | bb.0: ; GCN-LABEL: name: trivial_clause_load_flat4_x2 - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -35,14 +35,14 @@ body: | bb.0: ; GCN-LABEL: name: trivial_clause_load_flat4_x3 - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -52,16 +52,16 @@ body: | bb.0: ; GCN-LABEL: name: trivial_clause_load_flat4_x4 - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -71,12 +71,12 @@ body: | bb.0: ; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -86,10 +86,10 @@ body: | bb.0: ; GCN-LABEL: name: flat_load4_overwrite_ptr_lo - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -99,10 +99,10 @@ body: | bb.0: ; GCN-LABEL: name: flat_load4_overwrite_ptr_hi - ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -112,10 +112,10 @@ body: | bb.0: ; GCN-LABEL: name: flat_load8_overwrite_ptr - ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -128,48 +128,48 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4 - ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 ; GCN-NEXT: S_ENDPGM 0 - $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - - $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - - $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - - $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18 S_ENDPGM 0 ... @@ -180,13 +180,13 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -196,13 +196,13 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -212,13 +212,13 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_simple_load_flat8_ptr - ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -229,12 +229,12 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_simple_load_flat16_ptr - ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -249,17 +249,17 @@ ; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN: bb.1: ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 bb.0: - $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr bb.1: - $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -270,12 +270,12 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4 - ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -287,12 +287,12 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_store_load_into_data_flat4 - ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -303,15 +303,15 @@ body: | bb.0: ; GCN-LABEL: name: valu_inst_breaks_clause - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr8 = V_MOV_B32_e32 0, implicit $exec - $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -322,15 +322,15 @@ body: | bb.0: ; GCN-LABEL: name: salu_inst_breaks_clause - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $sgpr8 = S_MOV_B32 0 - $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -340,15 +340,15 @@ body: | bb.0: ; GCN-LABEL: name: ds_inst_breaks_clause - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec - $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -358,14 +358,14 @@ body: | bb.0: ; GCN-LABEL: name: smrd_inst_breaks_clause - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0 - $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0 + $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -375,13 +375,13 @@ body: | bb.0: ; GCN-LABEL: name: implicit_use_breaks_clause - ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 + ; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 - $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5 + $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -390,12 +390,12 @@ body: | bb.0: ; GCN-LABEL: name: trivial_clause_load_mubuf4_x2 - ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- @@ -404,13 +404,13 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr - ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- @@ -421,11 +421,11 @@ body: | bb.0: ; GCN-LABEL: name: mubuf_load4_overwrite_ptr - ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = V_MOV_B32_e32 0, implicit $exec $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec S_ENDPGM 0 @@ -438,13 +438,13 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_flat_load_mubuf_load - ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... # Break a clause from interference between mubuf and flat instructions @@ -459,8 +459,8 @@ body: | bb.0: - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... @@ -471,12 +471,12 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4 - ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 - $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... @@ -487,11 +487,11 @@ bb.0: ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4 ; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: S_ENDPGM 0 FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr - $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -501,12 +501,12 @@ body: | bb.0: ; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4 - ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -518,11 +518,11 @@ bb.0: ; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4 ; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec - ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- @@ -533,11 +533,11 @@ body: | bb.0: ; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr - ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec ; GCN-NEXT: S_ENDPGM 0 - $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec - $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec + $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- @@ -547,16 +547,16 @@ body: | bb.0: ; GCN-LABEL: name: mix_load_store_clause - ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr - FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -566,15 +566,15 @@ body: | bb.0: ; GCN-LABEL: name: mix_load_store_clause_same_address - ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; XNACK-NEXT: S_NOP 0 - ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr - ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr - FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr - FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ llvm/trunk/test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -43,8 +43,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -55,10 +55,10 @@ %26 = V_LSHL_B64 killed %25, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 - %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec - BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -105,8 +105,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -117,10 +117,10 @@ %26 = V_LSHL_B64 killed %25, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 - %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec - BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- @@ -168,8 +168,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -180,10 +180,10 @@ %26 = V_LSHL_B64 killed %25, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 - %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec - BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -233,8 +233,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -245,10 +245,10 @@ %26 = V_LSHL_B64 killed %25, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 - %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec - BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -310,8 +310,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -322,10 +322,10 @@ %26 = V_LSHL_B64 killed %25, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 - %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec - BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -375,8 +375,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec %25 = REG_SEQUENCE %3, 1, %24, 2 %10 = S_MOV_B32 61440 @@ -387,10 +387,10 @@ %26 = V_LSHL_B64 killed %25, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 %18 = COPY %26 - %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec - BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir +++ llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir @@ -17,15 +17,15 @@ $vgpr0_vgpr1 = IMPLICIT_DEF $vgpr4_vgpr5 = IMPLICIT_DEF - $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + $vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) $vgpr2 = IMPLICIT_DEF $vgpr3 = IMPLICIT_DEF $vgpr6 = IMPLICIT_DEF $vgpr0 = V_ADD_I32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec $vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec - FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) - FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir +++ llvm/trunk/test/CodeGen/AMDGPU/cluster-flat-loads.mir @@ -14,7 +14,7 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec - %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) ... Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir @@ -30,7 +30,7 @@ %14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec %15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec %16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec - BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4) + BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4) S_ENDPGM 0 bb.2: @@ -78,7 +78,7 @@ bb.8: successors: %bb.10 - %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) + %31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) %34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec %35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec %28:vreg_1 = COPY %35 Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-copymi-not-live.mir @@ -83,7 +83,7 @@ bb.9: successors: %bb.10(0x80000000) - %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + %19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) %21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec %22:sreg_64 = COPY $exec, implicit-def $exec %23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir @@ -68,7 +68,7 @@ %23:vreg_128 = COPY killed %17 %24:sreg_64 = COPY killed %16 %25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec - %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + %26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) %28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec %29:vreg_128 = COPY killed %21 %29.sub0:vreg_128 = COPY %1 Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -46,10 +46,10 @@ %0 = COPY $sgpr2_sgpr3 %1 = COPY $vgpr2 %2 = COPY $vgpr3 - %3 = S_LOAD_DWORDX8_IMM %0, 0, 0 - %4 = S_LOAD_DWORDX4_IMM %0, 12, 0 - %5 = S_LOAD_DWORDX8_IMM %0, 16, 0 - %6 = S_LOAD_DWORDX4_IMM %0, 28, 0 + %3 = S_LOAD_DWORDX8_IMM %0, 0, 0, 0 + %4 = S_LOAD_DWORDX4_IMM %0, 12, 0, 0 + %5 = S_LOAD_DWORDX8_IMM %0, 16, 0, 0 + %6 = S_LOAD_DWORDX4_IMM %0, 28, 0, 0 undef %7.sub0 = S_MOV_B32 212739 %20 = COPY %7 %11 = COPY %20 Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir @@ -11,7 +11,7 @@ # # GCN-LABEL: bb.6: # GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}}) -# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit $exec +# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, implicit $exec # --- | @@ -69,7 +69,7 @@ %10:sreg_64 = COPY killed %5 undef %11.sub2:sreg_128 = COPY %4 %11.sub3:sreg_128 = COPY %3 - %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, implicit $exec + %12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, implicit $exec undef %13.sub1:vreg_128 = COPY %9.sub1 %13.sub2:vreg_128 = COPY %9.sub2 %14:sreg_64 = V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $exec @@ -161,7 +161,7 @@ bb.18: successors: %bb.7(0x80000000) dead %59:vgpr_32 = V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $exec - dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, implicit $exec + dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, 0, implicit $exec undef %66.sub1:vreg_128 = COPY %13.sub1 %66.sub2:vreg_128 = COPY %13.sub2 %67:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir +++ llvm/trunk/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir @@ -145,10 +145,10 @@ %40:vgpr_32 = V_MAD_F32 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $exec %41:vgpr_32 = V_MAD_F32 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $exec %42:vgpr_32 = V_CVT_I32_F32_e32 killed %41, implicit $exec - %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0 :: (dereferenceable invariant load 4) + %43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0, 0 :: (dereferenceable invariant load 4) %45:vgpr_32 = V_MUL_LO_I32 killed %42, killed %43, implicit $exec %46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec - %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) + %47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) %49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec %50:sreg_64 = COPY $exec, implicit-def $exec %51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc Index: llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir +++ llvm/trunk/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -25,7 +25,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -33,7 +33,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] @@ -44,7 +44,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: DBG_VALUE @@ -72,7 +72,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -80,7 +80,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -92,7 +92,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc @@ -133,7 +133,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -141,7 +141,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] @@ -152,7 +152,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: @@ -180,7 +180,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -188,7 +188,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -200,7 +200,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc @@ -241,7 +241,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -249,7 +249,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] @@ -260,7 +260,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: @@ -289,7 +289,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -297,7 +297,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -309,7 +309,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc @@ -350,7 +350,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -358,7 +358,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc @@ -370,7 +370,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF @@ -400,7 +400,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -408,7 +408,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -420,7 +420,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: %15:sgpr_32 = IMPLICIT_DEF @@ -463,7 +463,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -471,7 +471,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]] @@ -482,7 +482,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF @@ -512,7 +512,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -520,7 +520,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -532,7 +532,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc @@ -575,7 +575,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -583,7 +583,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc @@ -595,7 +595,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc @@ -623,7 +623,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -631,7 +631,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -643,7 +643,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc @@ -683,7 +683,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -691,7 +691,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc @@ -703,7 +703,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc @@ -731,7 +731,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -739,7 +739,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -751,7 +751,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc @@ -791,7 +791,7 @@ ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + ; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) ; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec ; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1 @@ -799,7 +799,7 @@ ; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec ; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440 ; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0 - ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec ; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc @@ -811,7 +811,7 @@ ; GCN: %5.sub0:sgpr_128 = COPY %5.sub2 ; GCN: %5.sub1:sgpr_128 = COPY %5.sub2 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) ; GCN: bb.3: ; GCN: successors: %bb.5(0x80000000) ; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc @@ -842,7 +842,7 @@ bb.1: successors: %bb.2, %bb.3 - undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) + undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4) undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec %7:vgpr_32 = COPY %5.sub1 @@ -850,7 +850,7 @@ %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec %5.sub3:sgpr_128 = S_MOV_B32 61440 %5.sub2:sgpr_128 = S_MOV_B32 0 - BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec %12:sreg_64 = COPY $exec, implicit-def $exec %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc @@ -862,7 +862,7 @@ %5.sub0:sgpr_128 = COPY %5.sub2 %5.sub1:sgpr_128 = COPY %5.sub2 %14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) bb.3: $exec = S_OR_B64 $exec, %12, implicit-def $scc Index: llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ llvm/trunk/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -44,7 +44,7 @@ liveins: $sgpr0_sgpr1 %0 = COPY $sgpr0_sgpr1 - %1 = S_LOAD_DWORDX2_IMM %0, 36, 0 + %1 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %2 = COPY %1.sub1 %3 = COPY %1.sub0 %4 = S_MOV_B32 61440 @@ -54,7 +54,7 @@ %8 = S_MOV_B32 9999 %9 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc %10 = COPY %9 - BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -130,7 +130,7 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %31 = V_ASHRREV_I32_e64 31, %3, implicit $exec %32 = REG_SEQUENCE %3, 1, %31, 2 %33 = V_LSHLREV_B64 2, killed %32, implicit $exec @@ -144,19 +144,19 @@ %34 = V_MOV_B32_e32 63, implicit $exec %27 = V_AND_B32_e64 %26, %24, implicit $exec - FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %37, %27, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %28 = V_AND_B32_e64 %24, %26, implicit $exec - FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %37, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %29 = V_AND_B32_e32 %26, %24, implicit $exec - FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %37, %29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %30 = V_AND_B32_e64 %26, %26, implicit $exec - FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %37, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %31 = V_AND_B32_e64 %34, %34, implicit $exec - FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %37, %31, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -210,7 +210,7 @@ liveins: $sgpr0_sgpr1 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %5 = S_MOV_B32 1 %6 = COPY %4.sub1 %7 = COPY %4.sub0 @@ -219,7 +219,7 @@ %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4 %12 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc %13 = COPY %12 - BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -316,7 +316,7 @@ %2 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %15 = V_ASHRREV_I32_e64 31, %2, implicit $exec %16 = REG_SEQUENCE %2, 1, %15, 2 %17 = V_LSHLREV_B64 2, killed %16, implicit $exec @@ -332,34 +332,34 @@ %27 = S_MOV_B32 -4 %11 = V_LSHLREV_B32_e64 12, %10, implicit $exec - FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %12 = V_LSHLREV_B32_e64 %7, 12, implicit $exec - FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %13 = V_LSHL_B32_e64 %7, 12, implicit $exec - FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %14 = V_LSHL_B32_e64 12, %7, implicit $exec - FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %15 = V_LSHL_B32_e64 12, %24, implicit $exec - FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %22 = V_LSHL_B32_e64 %6, 12, implicit $exec - FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %23 = V_LSHL_B32_e64 %6, 32, implicit $exec - FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %25 = V_LSHL_B32_e32 %6, %6, implicit $exec - FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %26 = V_LSHLREV_B32_e32 11, %24, implicit $exec - FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %28 = V_LSHL_B32_e32 %27, %6, implicit $exec - FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -410,7 +410,7 @@ liveins: $sgpr0_sgpr1 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %5 = S_MOV_B32 999123 %6 = COPY %4.sub1 %7 = COPY %4.sub0 @@ -419,7 +419,7 @@ %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4 %12 = S_ASHR_I32 killed %5, 12, implicit-def dead $scc %13 = COPY %12 - BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -519,7 +519,7 @@ %2 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 + %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %15 = V_ASHRREV_I32_e64 31, %2, implicit $exec %16 = REG_SEQUENCE %2, 1, %15, 2 %17 = V_LSHLREV_B64 2, killed %16, implicit $exec @@ -540,34 +540,34 @@ %35 = V_MOV_B32_e32 2, implicit $exec %11 = V_ASHRREV_I32_e64 8, %10, implicit $exec - FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %12 = V_ASHRREV_I32_e64 %8, %10, implicit $exec - FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %13 = V_ASHR_I32_e64 %7, 3, implicit $exec - FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %14 = V_ASHR_I32_e64 7, %32, implicit $exec - FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %15 = V_ASHR_I32_e64 %27, %24, implicit $exec - FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %22 = V_ASHR_I32_e64 %6, 4, implicit $exec - FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %23 = V_ASHR_I32_e64 %6, %33, implicit $exec - FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %25 = V_ASHR_I32_e32 %34, %34, implicit $exec - FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %26 = V_ASHRREV_I32_e32 11, %10, implicit $exec - FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %28 = V_ASHR_I32_e32 %27, %35, implicit $exec - FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -618,7 +618,7 @@ liveins: $sgpr0_sgpr1 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 36, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %5 = S_MOV_B32 -999123 %6 = COPY %4.sub1 %7 = COPY %4.sub0 @@ -627,7 +627,7 @@ %10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4 %12 = S_LSHR_B32 killed %5, 12, implicit-def dead $scc %13 = COPY %12 - BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -728,7 +728,7 @@ %2 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %3 = S_LOAD_DWORDX2_IMM %0, 36, 0 + %3 = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %15 = V_ASHRREV_I32_e64 31, %2, implicit $exec %16 = REG_SEQUENCE %2, 1, %15, 2 %17 = V_LSHLREV_B64 2, killed %16, implicit $exec @@ -749,34 +749,34 @@ %35 = V_MOV_B32_e32 2, implicit $exec %11 = V_LSHRREV_B32_e64 8, %10, implicit $exec - FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %12 = V_LSHRREV_B32_e64 %8, %10, implicit $exec - FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %13 = V_LSHR_B32_e64 %7, 3, implicit $exec - FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %14 = V_LSHR_B32_e64 7, %32, implicit $exec - FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %15 = V_LSHR_B32_e64 %27, %24, implicit $exec - FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %22 = V_LSHR_B32_e64 %6, 4, implicit $exec - FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %23 = V_LSHR_B32_e64 %6, %33, implicit $exec - FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %25 = V_LSHR_B32_e32 %34, %34, implicit $exec - FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %26 = V_LSHRREV_B32_e32 11, %10, implicit $exec - FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %28 = V_LSHR_B32_e32 %27, %35, implicit $exec - FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 @@ -800,7 +800,7 @@ bb.0: %0 = V_MOV_B32_e32 0, implicit $exec %2 = V_XOR_B32_e64 killed %0, undef %1, implicit $exec - FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD undef %3, %2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir +++ llvm/trunk/test/CodeGen/AMDGPU/couldnt-join-subrange-3.mir @@ -291,7 +291,7 @@ bb.3..lr.ph3410.preheader: successors: %bb.4(0x80000000) - dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) + dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4) dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec %36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir +++ llvm/trunk/test/CodeGen/AMDGPU/dce-disjoint-intervals.mir @@ -11,7 +11,7 @@ bb.0: liveins: $sgpr0_sgpr1 - %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0 + %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0 S_NOP 0, implicit-def %4:sreg_128, implicit %10.sub1:sreg_128 S_CBRANCH_SCC0 %bb.3, implicit undef $scc S_BRANCH %bb.1 @@ -26,7 +26,7 @@ S_BRANCH %bb.4 bb.3: - %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0 + %10:sreg_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0 %7:sreg_32_xm0 = COPY %10.sub1:sreg_128 %8:sreg_32_xm0 = COPY %10.sub2:sreg_128 Index: llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir +++ llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir @@ -12,7 +12,7 @@ %1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec %3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1 - FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir +++ llvm/trunk/test/CodeGen/AMDGPU/dead-mi-use-same-intr.mir @@ -50,6 +50,6 @@ %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF - GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %2, 0, 0, 0, 0, implicit $exec dead %2:vgpr_32 = V_MAC_F32_e32 %0:vgpr_32, %1:vgpr_32, %2:vgpr_32, implicit $exec S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir +++ llvm/trunk/test/CodeGen/AMDGPU/dead_copy.mir @@ -23,5 +23,5 @@ $vgpr10 = COPY killed $sgpr14, implicit $exec $vgpr11 = COPY killed $sgpr15, implicit $exec - FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX4 $vgpr10_vgpr11, $vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ... Index: llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir +++ llvm/trunk/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir @@ -71,7 +71,7 @@ ; CHECK: dead %26:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF4]], 0, [[DEF1]], 0, 0, implicit $exec ; CHECK: dead %27:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF5]], 0, [[DEF2]], 0, 0, implicit $exec ; CHECK: dead %28:vgpr_32 = V_MAD_F32 0, [[V_MAC_F32_e32_]], 0, [[DEF6]], 0, [[DEF3]], 0, 0, implicit $exec - ; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, implicit $exec + ; CHECK: GLOBAL_STORE_DWORD [[DEF]], [[DEF10]], 0, 0, 0, 0, implicit $exec ; CHECK: S_ENDPGM 0 bb.0: successors: %bb.1 @@ -129,7 +129,7 @@ %26:vgpr_32 = V_MAD_F32 0, %25, 0, %4, 0, %1, 0, 0, implicit $exec %27:vgpr_32 = V_MAD_F32 0, %25, 0, %5, 0, %2, 0, 0, implicit $exec %28:vgpr_32 = V_MAD_F32 0, %25, 0, %6, 0, %3, 0, 0, implicit $exec - GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir +++ llvm/trunk/test/CodeGen/AMDGPU/endpgm-dce.mir @@ -17,7 +17,7 @@ %0 = IMPLICIT_DEF %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc - %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 @@ -25,7 +25,7 @@ --- # GCN-LABEL: name: load_without_memoperand # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc -# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr +# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr # GCN-NEXT: S_ENDPGM 0 name: load_without_memoperand tracksRegLiveness: true @@ -41,7 +41,7 @@ %0 = IMPLICIT_DEF %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc - %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr + %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 @@ -49,7 +49,7 @@ --- # GCN-LABEL: name: load_volatile # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc -# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) +# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) # GCN-NEXT: S_ENDPGM 0 name: load_volatile tracksRegLiveness: true @@ -65,7 +65,7 @@ %0 = IMPLICIT_DEF %3 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc - %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) + %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4) %2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $exec %4 = S_ADD_U32 %3, 1, implicit-def $scc S_ENDPGM 0 @@ -73,7 +73,7 @@ --- # GCN-LABEL: name: store # GCN: $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc -# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) +# GCN-NEXT: FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) # GCN-NEXT: S_ENDPGM 0 name: store tracksRegLiveness: true @@ -86,7 +86,7 @@ %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc - FLAT_STORE_DWORD %0, %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) S_ENDPGM 0 ... --- @@ -297,12 +297,12 @@ S_ENDPGM 0 ... -# GCN-LABEL: name: implicit_use_on_s_endpgm +# GCN-LABEL: name: implicit_use_on_S_ENDPGM 0 # GCN: V_ADD_I32 # GCN: COPY # GCN: V_ADDC_U32 # GCN: S_ENDPGM 0, implicit %3 -name: implicit_use_on_s_endpgm +name: implicit_use_on_S_ENDPGM 0 tracksRegLiveness: true body: | Index: llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir +++ llvm/trunk/test/CodeGen/AMDGPU/flat-load-clustering.mir @@ -54,24 +54,24 @@ %1 = COPY $sgpr4_sgpr5 %0 = COPY $vgpr0 - %3 = S_LOAD_DWORDX2_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %4 = S_LOAD_DWORDX2_IMM %1, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %3 = S_LOAD_DWORDX2_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %1, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %7 = V_LSHLREV_B32_e32 2, %0, implicit $exec %2 = V_MOV_B32_e32 0, implicit $exec undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec %11 = COPY %4.sub1 %12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec - %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1) + %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1) undef %9.sub0 = V_ADD_I32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec %8 = COPY %3.sub1 %9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec undef %13.sub0 = V_ADD_I32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec %13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec - %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34) + %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34) undef %10.sub0 = V_ADD_I32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec %10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec - FLAT_STORE_DWORD %9, %5, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2) - FLAT_STORE_DWORD %10, %6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4) + FLAT_STORE_DWORD %9, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2) + FLAT_STORE_DWORD %10, %6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll +++ llvm/trunk/test/CodeGen/AMDGPU/flat-offset-bug.ll @@ -0,0 +1,85 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s + +; GCN-LABEL: flat_inst_offset: +; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:4 +; GFX9: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}} offset:4 +; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} +; GFX10: flat_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}{{$}} +define void @flat_inst_offset(i32* nocapture %p) { + %gep = getelementptr inbounds i32, i32* %p, i64 1 + %load = load i32, i32* %gep, align 4 + %inc = add nsw i32 %load, 1 + store i32 %inc, i32* %gep, align 4 + ret void +} + +; GCN-LABEL: global_inst_offset: +; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off offset:4 +; GCN: global_store_dword v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:4 +define void @global_inst_offset(i32 addrspace(1)* nocapture %p) { + %gep = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 1 + %load = load i32, i32 addrspace(1)* %gep, align 4 + %inc = add nsw i32 %load, 1 + store i32 %inc, i32 addrspace(1)* %gep, align 4 + ret void +} + +; GCN-LABEL: load_i16_lo: +; GFX9 : flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} +; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} +define amdgpu_kernel void @load_i16_lo(i16* %arg, <2 x i16>* %out) { + %gep = getelementptr inbounds i16, i16* %arg, i32 4 + %ld = load i16, i16* %gep, align 2 + %vec = insertelement <2 x i16> , i16 %ld, i32 0 + %v = add <2 x i16> %vec, %vec + store <2 x i16> %v, <2 x i16>* %out, align 4 + ret void +} + +; GCN-LABEL: load_i16_hi: +; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} +; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} +define amdgpu_kernel void @load_i16_hi(i16* %arg, <2 x i16>* %out) { + %gep = getelementptr inbounds i16, i16* %arg, i32 4 + %ld = load i16, i16* %gep, align 2 + %vec = insertelement <2 x i16> , i16 %ld, i32 1 + %v = add <2 x i16> %vec, %vec + store <2 x i16> %v, <2 x i16>* %out, align 4 + ret void +} + +; GCN-LABEL: load_half_lo: +; GFX9: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} +; GFX10: flat_load_short_d16 v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} +define amdgpu_kernel void @load_half_lo(half* %arg, <2 x half>* %out) { + %gep = getelementptr inbounds half, half* %arg, i32 4 + %ld = load half, half* %gep, align 2 + %vec = insertelement <2 x half> , half %ld, i32 0 + %v = fadd <2 x half> %vec, %vec + store <2 x half> %v, <2 x half>* %out, align 4 + ret void +} + +; GCN-LABEL: load_half_hi: +; GFX9: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}] offset:8{{$}} +; GFX10: flat_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} +define amdgpu_kernel void @load_half_hi(half* %arg, <2 x half>* %out) { + %gep = getelementptr inbounds half, half* %arg, i32 4 + %ld = load half, half* %gep, align 2 + %vec = insertelement <2 x half> , half %ld, i32 1 + %v = fadd <2 x half> %vec, %vec + store <2 x half> %v, <2 x half>* %out, align 4 + ret void +} + +; GCN-LABEL: load_float_lo: +; GFX9: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}] offset:16{{$}} +; GFX10: flat_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}]{{$}} +define amdgpu_kernel void @load_float_lo(float* %arg, float* %out) { + %gep = getelementptr inbounds float, float* %arg, i32 4 + %ld = load float, float* %gep, align 4 + %v = fadd float %ld, %ld + store float %v, float* %out, align 4 + ret void +} Index: llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir +++ llvm/trunk/test/CodeGen/AMDGPU/fold-imm-copy.mir @@ -10,12 +10,12 @@ liveins: $vgpr0, $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 %1:sgpr_64 = COPY $sgpr0_sgpr1 - %2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0 + %2:sreg_128 = S_LOAD_DWORDX4_IMM %1, 9, 0, 0 %3:sreg_32_xm0 = S_MOV_B32 2 %4:vgpr_32 = V_LSHLREV_B32_e64 killed %3, %0, implicit $exec %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec %6:vreg_64 = REG_SEQUENCE killed %4, %subreg.sub0, killed %5, %subreg.sub1 - %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, implicit $exec + %7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, 0, implicit $exec %8:sreg_32_xm0 = S_MOV_B32 65535 %9:vgpr_32 = COPY %8 %10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir +++ llvm/trunk/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir @@ -158,10 +158,10 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %12 = V_MOV_B32_e32 1065353216, implicit $exec %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -222,13 +222,13 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 1065353216, implicit $exec %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -289,14 +289,14 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 1065353216, implicit $exec %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -360,16 +360,16 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 1065353216, implicit $exec %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -427,13 +427,13 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 1, implicit $exec %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -494,16 +494,16 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %14 = V_MOV_B32_e32 -2, implicit $exec %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -564,13 +564,13 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) %13 = V_MOV_B32_e32 15360, implicit $exec %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) S_ENDPGM 0 ... @@ -631,13 +631,13 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %13 = V_MOV_B32_e32 80886784, implicit $exec %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec - BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 ... @@ -697,13 +697,13 @@ %8 = S_MOV_B32 61440 %9 = S_MOV_B32 -1 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 - %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) + %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) %13 = V_MOV_B32_e32 305413120, implicit $exec %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) - BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir +++ llvm/trunk/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir @@ -46,9 +46,9 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 - %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0 %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec %28 = REG_SEQUENCE %3, 1, %27, 2 %11 = S_MOV_B32 61440 @@ -60,13 +60,13 @@ %17 = REG_SEQUENCE killed %6, 17, %13, 18 %18 = REG_SEQUENCE killed %4, 17, %13, 18 %20 = COPY %29 - %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec %22 = COPY %29 - %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec %26 = COPY %29 - BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -117,9 +117,9 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 - %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0 %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec %28 = REG_SEQUENCE %3, 1, %27, 2 %11 = S_MOV_B32 61440 @@ -131,13 +131,13 @@ %17 = REG_SEQUENCE killed %6, 17, %13, 18 %18 = REG_SEQUENCE killed %4, 17, %13, 18 %20 = COPY %29 - %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec %22 = COPY %29 - %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec %26 = COPY %29 - BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -188,9 +188,9 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 - %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0 %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec %28 = REG_SEQUENCE %3, 1, %27, 2 %11 = S_MOV_B32 61440 @@ -202,13 +202,13 @@ %17 = REG_SEQUENCE killed %6, 17, %13, 18 %18 = REG_SEQUENCE killed %4, 17, %13, 18 %20 = COPY %29 - %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec %22 = COPY %29 - %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec %26 = COPY %29 - BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -259,9 +259,9 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 - %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0, 0 %27 = V_ASHRREV_I32_e32 31, %3, implicit $exec %28 = REG_SEQUENCE %3, 1, %27, 2 %11 = S_MOV_B32 61440 @@ -273,13 +273,13 @@ %17 = REG_SEQUENCE killed %6, 17, %13, 18 %18 = REG_SEQUENCE killed %4, 17, %13, 18 %20 = COPY %29 - %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec %22 = COPY %29 - %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit $exec + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec %23 = V_MOV_B32_e32 1090519040, implicit $exec %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec %26 = COPY %29 - BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir +++ llvm/trunk/test/CodeGen/AMDGPU/fold-multiple.mir @@ -34,7 +34,7 @@ %3 = S_LSHL_B32 %1, killed %1, implicit-def dead $scc %4 = V_AND_B32_e64 killed %2, killed %3, implicit $exec %5 = IMPLICIT_DEF - BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir +++ llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir @@ -93,7 +93,7 @@ %1:sgpr_64 = COPY $sgpr0_sgpr1 %0:vgpr_32 = COPY $vgpr0 - %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load 8 ) + %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1, 36, 0, 0 :: (dereferenceable invariant load 8 ) %5:sreg_32_xm0 = S_MOV_B32 2 %6:vgpr_32 = V_LSHLREV_B32_e64 killed %5, %0, implicit $exec %7:sreg_32_xm0 = S_MOV_B32 0 @@ -109,140 +109,140 @@ %16:vreg_64 = REG_SEQUENCE %17, %subreg.sub0, %18, %subreg.sub1 %11:vreg_64 = COPY %16 - %10:vgpr_32 = GLOBAL_LOAD_DWORD %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_DWORD %11, %10, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %40:vreg_64 = GLOBAL_LOAD_DWORDX2 %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_DWORDX2 %11, %40, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %41:vreg_96 = GLOBAL_LOAD_DWORDX3 %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_DWORDX3 %11, %41, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %42:vreg_128 = GLOBAL_LOAD_DWORDX4 %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_DWORDX4 %11, %42, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %43:vgpr_32 = GLOBAL_LOAD_SSHORT %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_SHORT %11, %43, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %44:vgpr_32 = GLOBAL_LOAD_USHORT %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_SHORT %11, %44, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %45:vgpr_32 = GLOBAL_LOAD_UBYTE %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_BYTE %11, %45, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %46:vgpr_32 = GLOBAL_LOAD_SBYTE %11, 16, 0, 0, implicit $exec :: (load 4) - GLOBAL_STORE_BYTE %11, %46, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %47:vgpr_32 = GLOBAL_LOAD_SBYTE_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4) - GLOBAL_STORE_BYTE_D16_HI %11, %47, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %48:vgpr_32 = GLOBAL_LOAD_UBYTE_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4) - GLOBAL_STORE_BYTE_D16_HI %11, %48, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %49:vgpr_32 = GLOBAL_LOAD_SBYTE_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4) - GLOBAL_STORE_BYTE_D16_HI %11, %49, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %50:vgpr_32 = GLOBAL_LOAD_UBYTE_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4) - GLOBAL_STORE_BYTE_D16_HI %11, %50, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %51:vgpr_32 = GLOBAL_LOAD_SHORT_D16_HI %11, 16, 0, 0, %46, implicit $exec :: (load 4) - GLOBAL_STORE_SHORT_D16_HI %11, %51, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) - %52:vgpr_32 = GLOBAL_LOAD_SHORT_D16 %11, 16, 0, 0, %46, implicit $exec :: (load 4) - GLOBAL_STORE_SHORT_D16_HI %11, %52, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %10:vgpr_32 = GLOBAL_LOAD_DWORD %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_DWORD %11, %10, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %40:vreg_64 = GLOBAL_LOAD_DWORDX2 %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_DWORDX2 %11, %40, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %41:vreg_96 = GLOBAL_LOAD_DWORDX3 %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_DWORDX3 %11, %41, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %42:vreg_128 = GLOBAL_LOAD_DWORDX4 %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_DWORDX4 %11, %42, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %43:vgpr_32 = GLOBAL_LOAD_SSHORT %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_SHORT %11, %43, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %44:vgpr_32 = GLOBAL_LOAD_USHORT %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_SHORT %11, %44, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %45:vgpr_32 = GLOBAL_LOAD_UBYTE %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_BYTE %11, %45, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %46:vgpr_32 = GLOBAL_LOAD_SBYTE %11, 16, 0, 0, 0, implicit $exec :: (load 4) + GLOBAL_STORE_BYTE %11, %46, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %47:vgpr_32 = GLOBAL_LOAD_SBYTE_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4) + GLOBAL_STORE_BYTE_D16_HI %11, %47, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %48:vgpr_32 = GLOBAL_LOAD_UBYTE_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4) + GLOBAL_STORE_BYTE_D16_HI %11, %48, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %49:vgpr_32 = GLOBAL_LOAD_SBYTE_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4) + GLOBAL_STORE_BYTE_D16_HI %11, %49, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %50:vgpr_32 = GLOBAL_LOAD_UBYTE_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4) + GLOBAL_STORE_BYTE_D16_HI %11, %50, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %51:vgpr_32 = GLOBAL_LOAD_SHORT_D16_HI %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4) + GLOBAL_STORE_SHORT_D16_HI %11, %51, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + %52:vgpr_32 = GLOBAL_LOAD_SHORT_D16 %11, 16, 0, 0, 0, %46, implicit $exec :: (load 4) + GLOBAL_STORE_SHORT_D16_HI %11, %52, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) %53:vgpr_32 = GLOBAL_ATOMIC_XOR_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %53, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %53, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_XOR %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %54:vgpr_32 = GLOBAL_ATOMIC_SMIN_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %54, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %54, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SMIN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %55:vgpr_32 = GLOBAL_ATOMIC_AND_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %55, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %55, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_AND %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %56:vgpr_32 = GLOBAL_ATOMIC_SWAP_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %56, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %56, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SWAP %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %57:vgpr_32 = GLOBAL_ATOMIC_SMAX_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %57, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %57, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SMAX %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %58:vgpr_32 = GLOBAL_ATOMIC_UMIN_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %58, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %58, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_UMIN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %59:vgpr_32 = GLOBAL_ATOMIC_UMAX_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %59, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %59, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_UMAX %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %60:vgpr_32 = GLOBAL_ATOMIC_OR_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %60, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %60, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_OR %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %61:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %61, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %61, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_ADD %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %62:vgpr_32 = GLOBAL_ATOMIC_SUB_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %62, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %62, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SUB %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %63:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %63, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %63, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_CMPSWAP %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %64:vgpr_32 = GLOBAL_ATOMIC_INC_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %64, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %64, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_INC %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %65:vgpr_32 = GLOBAL_ATOMIC_DEC_RTN %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORD %11, %65, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORD %11, %65, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_DEC %11, %15, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %66:vreg_64 = GLOBAL_ATOMIC_OR_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %66, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %66, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_OR_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %67:vreg_64 = GLOBAL_ATOMIC_XOR_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %67, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %67, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_XOR_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %68:vreg_64 = GLOBAL_ATOMIC_AND_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %68, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %68, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_AND_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %69:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %69, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %69, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_ADD_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %70:vreg_64 = GLOBAL_ATOMIC_SUB_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %70, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %70, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SUB_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %71:vreg_64 = GLOBAL_ATOMIC_DEC_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %71, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %71, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_DEC_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %72:vreg_64 = GLOBAL_ATOMIC_INC_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %72, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %72, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_INC_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %73:vreg_64 = GLOBAL_ATOMIC_SMIN_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %73, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %73, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SMIN_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %74:vreg_64 = GLOBAL_ATOMIC_SWAP_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %74, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %74, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SWAP_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %75:vreg_64 = GLOBAL_ATOMIC_SMAX_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %75, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %75, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_SMAX_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %76:vreg_64 = GLOBAL_ATOMIC_UMIN_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %76, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %76, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_UMIN_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %77:vreg_64 = GLOBAL_ATOMIC_UMAX_X2_RTN %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %77, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %77, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_UMAX_X2 %11, %16, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) %79:sreg_128 = REG_SEQUENCE %4, %subreg.sub0, %4, %subreg.sub1, %4, %subreg.sub2, %4, %subreg.sub3 %80:vreg_128 = COPY %79 %78:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) - GLOBAL_STORE_DWORDX2 %11, %78, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) + GLOBAL_STORE_DWORDX2 %11, %78, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) GLOBAL_ATOMIC_CMPSWAP_X2 %11, %80, 16, 0, implicit $exec :: (volatile load store seq_cst 4, addrspace 1) S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir +++ llvm/trunk/test/CodeGen/AMDGPU/hazard-buffer-store-v-interp.mir @@ -12,7 +12,7 @@ bb.0.entry: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, implicit $exec $vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir +++ llvm/trunk/test/CodeGen/AMDGPU/hazard-inlineasm.mir @@ -16,7 +16,7 @@ body: | bb.0: - FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, 0, 0, implicit $exec, implicit $flat_scr INLINEASM &"v_mad_u64_u32 $0, $1, $2, $3, $4", 0, 2621450, def $vgpr26_vgpr27, 2818058, def dead $sgpr14_sgpr15, 589833, $sgpr12, 327689, killed $vgpr51, 2621449, $vgpr46_vgpr47 S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir +++ llvm/trunk/test/CodeGen/AMDGPU/hazard-kill.mir @@ -21,7 +21,7 @@ liveins: $sgpr2, $sgpr3, $sgpr4 $sgpr6 = S_MOV_B32 killed $sgpr3 - renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0 + renamable $sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 16, 0, 0 $m0 = S_MOV_B32 killed renamable $sgpr4 dead renamable $sgpr0 = KILL undef renamable $sgpr2 renamable $vgpr0 = V_INTERP_MOV_F32 2, 0, 0, implicit $m0, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ llvm/trunk/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -12,7 +12,7 @@ ; GCN: bb.0.entry: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 - ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4) + ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4) ; GCN: renamable $sgpr2 = COPY renamable $sgpr1 ; GCN: renamable $sgpr4 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 ; GCN: renamable $sgpr5 = S_MOV_B32 61440 @@ -101,7 +101,7 @@ ; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1 ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) ; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) ; GCN: S_ENDPGM entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 Index: llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir +++ llvm/trunk/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir @@ -49,10 +49,10 @@ bb.0 (%ir-block.2): $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) - $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) + $vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec $vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec $vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -230,28 +230,28 @@ body: | bb.0: - BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec - BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec - BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec - BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec - BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec - BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec $vgpr3 = V_MOV_B32_e32 0, implicit $exec S_BRANCH %bb.1 bb.1: - FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = V_MOV_B32_e32 0, implicit $exec - FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = V_MOV_B32_e32 0, implicit $exec - FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = V_MOV_B32_e32 0, implicit $exec FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr $vgpr3 = V_MOV_B32_e32 0, implicit $exec @@ -549,14 +549,14 @@ $flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc $flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc DBG_VALUE $noreg, 2, !5, !11, debug-location !12 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5 $sgpr8 = S_MOV_B32 $sgpr5 $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec - BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4) + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4) $sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5 $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec - BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr) + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir +++ llvm/trunk/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir @@ -55,7 +55,7 @@ bb.0.entry: liveins: $sgpr0_sgpr1 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc @@ -64,7 +64,7 @@ liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 100, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 1, implicit $exec S_BRANCH %bb.3 @@ -72,7 +72,7 @@ liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 9, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 0, implicit $exec bb.3.done: @@ -80,7 +80,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir +++ llvm/trunk/test/CodeGen/AMDGPU/limit-coalesce.mir @@ -57,15 +57,15 @@ %4.sub1 = COPY %3.sub0 undef %5.sub0 = COPY %4.sub1 %5.sub1 = COPY %4.sub0 - FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX2 $vgpr0_vgpr1, killed %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %6 = IMPLICIT_DEF undef %7.sub0_sub1 = COPY %6 %7.sub2 = COPY %3.sub0 - FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX3 $vgpr0_vgpr1, killed %7, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %8 = IMPLICIT_DEF undef %9.sub0_sub1_sub2 = COPY %8 %9.sub3 = COPY %3.sub0 - FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX4 $vgpr0_vgpr1, killed %9, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ... Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll @@ -0,0 +1,19 @@ +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s + +declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() #0 + +; GCN-LABEL: {{^}}test_s_get_waveid_in_workgroup: +; GFX10: global_store_dword +; GFX10: s_get_waveid_in_workgroup [[DEST:s[0-9]+]] +; GFX10: s_waitcnt lgkmcnt(0) +; GFX10: v_mov_b32_e32 [[VDEST:v[0-9]+]], [[DEST]] +; GFX10: global_store_dword v[{{[0-9:]+}}], [[VDEST]], off +define amdgpu_kernel void @test_s_get_waveid_in_workgroup(i32 addrspace(1)* %out) { +; Make sure %out is loaded and assiciated wait count already inserted + store i32 0, i32 addrspace(1)* %out + %v = call i32 @llvm.amdgcn.s.get.waveid.in.workgroup() + store i32 %v, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir @@ -80,13 +80,13 @@ successors: %bb.1.atomic(0x40000000), %bb.2.exit(0x40000000) liveins: $vgpr0, $sgpr0_sgpr1 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $vgpr1 = V_ASHRREV_I32_e32 31, $vgpr0, implicit $exec $vgpr1_vgpr2 = V_LSHL_B64 $vgpr0_vgpr1, 3, implicit $exec $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 0 S_WAITCNT 127 - $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep) + $vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep) $vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec @@ -97,7 +97,7 @@ successors: %bb.2.exit(0x80000000) liveins: $sgpr4_sgpr5_sgpr6_sgpr7:0x0000000C, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr1_vgpr2_vgpr3_vgpr4:0x00000003 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 15, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) dead $vgpr0 = V_MOV_B32_e32 -1, implicit $exec dead $vgpr0 = V_MOV_B32_e32 61440, implicit $exec $sgpr4_sgpr5 = S_MOV_B64 0 Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-invalid-addrspace.mir @@ -11,10 +11,10 @@ $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit $sgpr2_sgpr3, implicit $exec - renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`) + renamable $vgpr2 = FLAT_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(42)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -30,7 +30,7 @@ $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store syncscope("agent-one-as") seq_cst 4 into `i32 addrspace(42)* undef`) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-local.mir @@ -13,14 +13,14 @@ name: load_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -37,14 +37,14 @@ name: load_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -61,14 +61,14 @@ name: load_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -85,14 +85,14 @@ name: load_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -109,14 +109,14 @@ name: load_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -133,14 +133,14 @@ name: load_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -157,14 +157,14 @@ name: load_wavefront_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -181,14 +181,14 @@ name: load_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -205,14 +205,14 @@ name: load_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -229,14 +229,14 @@ name: load_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -253,14 +253,14 @@ name: load_workgroup_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -277,14 +277,14 @@ name: load_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -301,14 +301,14 @@ name: load_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -325,14 +325,14 @@ name: load_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -349,14 +349,14 @@ name: load_agent_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -373,14 +373,14 @@ name: load_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -397,14 +397,14 @@ name: load_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -421,14 +421,14 @@ name: load_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -445,14 +445,14 @@ name: load_system_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -469,14 +469,14 @@ name: load_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(3)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -493,8 +493,8 @@ name: store_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -515,8 +515,8 @@ name: store_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -537,8 +537,8 @@ name: store_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -559,8 +559,8 @@ name: store_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -581,8 +581,8 @@ name: store_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -603,8 +603,8 @@ name: store_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -625,8 +625,8 @@ name: store_wavefront_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -647,8 +647,8 @@ name: store_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -669,8 +669,8 @@ name: store_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -691,8 +691,8 @@ name: store_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -713,8 +713,8 @@ name: store_workgroup_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -735,8 +735,8 @@ name: store_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -757,8 +757,8 @@ name: store_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -779,8 +779,8 @@ name: store_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -801,8 +801,8 @@ name: store_agent_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -823,8 +823,8 @@ name: store_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -845,8 +845,8 @@ name: store_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -867,8 +867,8 @@ name: store_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -889,8 +889,8 @@ name: store_system_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -911,8 +911,8 @@ name: store_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -933,8 +933,8 @@ name: atomicrmw_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -955,8 +955,8 @@ name: atomicrmw_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -977,8 +977,8 @@ name: atomicrmw_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -999,8 +999,8 @@ name: atomicrmw_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -1021,8 +1021,8 @@ name: atomicrmw_singlethread_acq_rel body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -1043,8 +1043,8 @@ name: atomicrmw_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir @@ -16,27 +16,27 @@ successors: %bb.1(0x30000000), %bb.2(0x50000000) liveins: $sgpr0_sgpr1, $sgpr3 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr0 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`) S_WAITCNT 127 S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 2, implicit $exec $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`) + BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`) S_CBRANCH_SCC0 %bb.1, implicit killed $scc bb.2: successors: %bb.3(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 32772, implicit $exec S_BRANCH %bb.3 @@ -45,7 +45,7 @@ successors: %bb.3(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 4, implicit $exec @@ -55,11 +55,11 @@ S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`) + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 - FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`) + FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32 addrspace(1)* undef`) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir @@ -62,7 +62,7 @@ # CHECK-LABEL: name: multiple_mem_operands # CHECK-LABEL: bb.3.done: -# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 1, 1, 0 +# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 1, 1, 0, 0 name: multiple_mem_operands alignment: 0 @@ -110,27 +110,27 @@ successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000) liveins: $sgpr0_sgpr1, $sgpr3 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr0 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01) S_WAITCNT 127 S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 2, implicit $exec $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12) + BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12) S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc bb.2.else: successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 32772, implicit $exec S_BRANCH %bb.3.done @@ -139,7 +139,7 @@ successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 4, implicit $exec @@ -149,11 +149,11 @@ S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 - FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) + FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir @@ -42,7 +42,7 @@ # CHECK-LABEL: name: multiple_mem_operands # CHECK-LABEL: bb.3.done: -# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0 +# CHECK: BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0 name: multiple_mem_operands alignment: 0 @@ -90,27 +90,27 @@ successors: %bb.1.if(0x30000000), %bb.2.else(0x50000000) liveins: $sgpr0_sgpr1, $sgpr3 - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 44, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) $sgpr8 = S_MOV_B32 &SCRATCH_RSRC_DWORD0, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM $sgpr0_sgpr1, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $sgpr9 = S_MOV_B32 &SCRATCH_RSRC_DWORD1, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 $vgpr0 = V_MOV_B32_e32 1, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01) S_WAITCNT 127 S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 2, implicit $exec $vgpr1 = V_MOV_B32_e32 32772, implicit $exec - BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12) + BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12) S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc bb.2.else: successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 52, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 32772, implicit $exec S_BRANCH %bb.3.done @@ -119,7 +119,7 @@ successors: %bb.3.done(0x80000000) liveins: $sgpr0_sgpr1, $sgpr4_sgpr5, $sgpr3, $sgpr8_sgpr9_sgpr10_sgpr11 - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 48, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) S_WAITCNT 3855 $vgpr0 = V_MOV_B32_e32 4, implicit $exec @@ -129,11 +129,11 @@ S_WAITCNT 127 $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec - $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) + $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr) $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5 $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec S_WAITCNT 3952 - FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) + FLAT_STORE_DWORD killed $vgpr1_vgpr2, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.out) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-region.mir @@ -13,14 +13,14 @@ name: load_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 1, 0, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") unordered 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -37,14 +37,14 @@ name: load_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") monotonic 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -61,14 +61,14 @@ name: load_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") acquire 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -85,14 +85,14 @@ name: load_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("singlethread-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -109,14 +109,14 @@ name: load_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") unordered 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -133,14 +133,14 @@ name: load_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") monotonic 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -157,14 +157,14 @@ name: load_wavefront_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") acquire 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -181,14 +181,14 @@ name: load_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("wavefront-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -205,14 +205,14 @@ name: load_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") unordered 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -229,14 +229,14 @@ name: load_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") monotonic 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -253,14 +253,14 @@ name: load_workgroup_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") acquire 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -277,14 +277,14 @@ name: load_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("workgroup-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -301,14 +301,14 @@ name: load_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") unordered 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -325,14 +325,14 @@ name: load_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") monotonic 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -349,14 +349,14 @@ name: load_agent_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") acquire 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -373,14 +373,14 @@ name: load_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("agent-one-as") seq_cst 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -397,14 +397,14 @@ name: load_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") unordered 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -421,14 +421,14 @@ name: load_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") monotonic 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -445,14 +445,14 @@ name: load_system_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") acquire 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -469,14 +469,14 @@ name: load_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 44, 0, 0 :: (dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, align 4, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec renamable $vgpr2 = DS_READ_B32 killed renamable $vgpr0, 0, 1, implicit $m0, implicit $exec :: (volatile load syncscope("one-as") seq_cst 4 from `i32 addrspace(2)* undef`) $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec - FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) + FLAT_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into `i32* undef`) S_ENDPGM 0 ... @@ -493,8 +493,8 @@ name: store_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -515,8 +515,8 @@ name: store_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -537,8 +537,8 @@ name: store_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -559,8 +559,8 @@ name: store_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -581,8 +581,8 @@ name: store_wavefront_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -603,8 +603,8 @@ name: store_wavefront_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -625,8 +625,8 @@ name: store_wavefront_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -647,8 +647,8 @@ name: store_wavefront_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -669,8 +669,8 @@ name: store_workgroup_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -691,8 +691,8 @@ name: store_workgroup_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -713,8 +713,8 @@ name: store_workgroup_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -735,8 +735,8 @@ name: store_workgroup_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -757,13 +757,14 @@ name: store_agent_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec DS_WRITE_B32 killed renamable $vgpr0, killed renamable $vgpr1, 0, 1, implicit $m0, implicit $exec :: (volatile store syncscope("agent-one-as") unordered 4 into `i32 addrspace(2)* undef`) S_ENDPGM 0 + ... --- @@ -778,8 +779,8 @@ name: store_agent_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -800,8 +801,8 @@ name: store_agent_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -822,8 +823,8 @@ name: store_agent_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -844,8 +845,8 @@ name: store_system_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -866,8 +867,8 @@ name: store_system_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -888,8 +889,8 @@ name: store_system_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -910,8 +911,8 @@ name: store_system_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -932,8 +933,8 @@ name: atomicrmw_singlethread_unordered body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -954,8 +955,8 @@ name: atomicrmw_singlethread_monotonic body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -976,8 +977,8 @@ name: atomicrmw_singlethread_acquire body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -998,8 +999,8 @@ name: atomicrmw_singlethread_release body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -1020,8 +1021,8 @@ name: atomicrmw_singlethread_acq_rel body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec @@ -1042,8 +1043,8 @@ name: atomicrmw_singlethread_seq_cst body: | bb.0: - $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) - $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) + $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, addrspace 4) + $sgpr0 = S_LOAD_DWORD_IMM killed $sgpr0_sgpr1, 40, 0, 0 :: (dereferenceable invariant load 4 from `i32 addrspace(4)* undef`, align 8, addrspace 4) $m0 = S_MOV_B32 -1 $vgpr1 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit $exec $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir +++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.mir @@ -2,12 +2,12 @@ # GCN-LABEL: {{^}}name: vector_clause{{$}} # GCN: early-clobber %2:vreg_128, early-clobber %4:vreg_128, early-clobber %1:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec { -# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec -# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec -# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec -# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec +# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec +# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec # GCN-NEXT: } -# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec +# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec --- name: vector_clause @@ -21,24 +21,24 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec - %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec - %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec - %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, implicit $exec + %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec + %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec + %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec + %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: subreg_full{{$}} # GCN: early-clobber %1:vreg_128 = BUNDLE %0, implicit $exec { -# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec -# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec -# GCN-NEXT: internal %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec -# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec +# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: internal %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec +# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec # GCN-NEXT: } -# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec +# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec --- name: subreg_full @@ -49,20 +49,20 @@ body: | bb.0: %0 = IMPLICIT_DEF - undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec - %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec - %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec - %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec + undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec + %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec + %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec + %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: subreg_part{{$}} # GCN: undef early-clobber %1.sub0_sub1:vreg_128, undef early-clobber %1.sub3:vreg_128 = BUNDLE %0, implicit $exec { -# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec -# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec -# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec +# GCN-NEXT: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: internal %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: internal %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec # GCN-NEXT: } -# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec +# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec --- name: subreg_part @@ -73,18 +73,18 @@ body: | bb.0: %0 = IMPLICIT_DEF - undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, implicit $exec - %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, implicit $exec - %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, implicit $exec + undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, 0, 0, implicit $exec + %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, 0, 0, implicit $exec + %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: dead{{$}} # GCN: dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128 = BUNDLE %0, implicit $exec { -# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec -# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec -# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec -# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec +# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec # GCN-NEXT: } --- @@ -99,18 +99,18 @@ body: | bb.0: %0 = IMPLICIT_DEF - dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec - dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec - dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec - dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec + dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec + dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec + dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec + dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: subreg_dead{{$}} # GCN: early-clobber %1:vreg_64 = BUNDLE %0, implicit $exec { -# GCN-NEXT: %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec -# GCN-NEXT: dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec +# GCN-NEXT: %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec # GCN-NEXT: } -# GCN-NEXT: GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, implicit $exec +# GCN-NEXT: GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec --- name: subreg_dead @@ -121,15 +121,15 @@ body: | bb.0: %0 = IMPLICIT_DEF - undef %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec - dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec - GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, implicit $exec + undef %1.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec + dead %1.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %0, %1.sub0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: kill{{$}} # GCN: early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %0, %1, implicit $exec { -# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec -# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec +# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec # GCN-NEXT: } --- @@ -144,17 +144,17 @@ bb.0: %0 = IMPLICIT_DEF %1 = IMPLICIT_DEF - %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec - %3:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, implicit $exec + %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec + %3:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: indirect{{$}} -# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, implicit $exec +# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec # GCN-NEXT: early-clobber %2:vreg_128, early-clobber %3:vreg_128 = BUNDLE %1, implicit $exec { -# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, implicit $exec -# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec +# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec # GCN-NEXT: } --- @@ -168,18 +168,18 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, implicit $exec - %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, implicit $exec - %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, implicit $exec + %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, implicit $exec + %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, implicit $exec + %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: stack{{$}} # GCN: %0:vreg_64 = IMPLICIT_DEF -# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, implicit $exec -# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, implicit $exec -# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec +# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec --- name: stack @@ -193,33 +193,33 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, implicit $exec - %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, implicit $exec - GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, implicit $exec + %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, 0, 0, implicit $exec + %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: overflow_counter{{$}} # GCN: dead early-clobber %7:vgpr_32, dead early-clobber %14:vgpr_32, dead early-clobber %2:vgpr_32, dead early-clobber %9:vgpr_32, dead early-clobber %4:vgpr_32, dead early-clobber %11:vgpr_32, dead early-clobber %6:vgpr_32, dead early-clobber %13:vgpr_32, dead early-clobber %1:vgpr_32, dead early-clobber %8:vgpr_32, dead early-clobber %15:vgpr_32, dead early-clobber %3:vgpr_32, dead early-clobber %10:vgpr_32, dead early-clobber %5:vgpr_32, dead early-clobber %12:vgpr_32 = BUNDLE %0, implicit $exec { -# GCN-NEXT: dead %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, implicit $exec -# GCN-NEXT: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, implicit $exec -# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, implicit $exec -# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, implicit $exec -# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec -# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, implicit $exec -# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, implicit $exec -# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, implicit $exec -# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec -# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, implicit $exec -# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, implicit $exec -# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, implicit $exec -# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, implicit $exec -# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, implicit $exec -# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, implicit $exec +# GCN-NEXT: dead %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec # GCN-NEXT: } # GCN-NEXT: dead early-clobber %16:vgpr_32, dead early-clobber %17:vgpr_32 = BUNDLE %0, implicit $exec { -# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, implicit $exec -# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, implicit $exec +# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec # GCN-NEXT: } --- @@ -247,36 +247,36 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, implicit $exec - %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, implicit $exec - %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, implicit $exec - %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, implicit $exec - %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, implicit $exec - %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, implicit $exec - %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, implicit $exec - %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, implicit $exec - %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, implicit $exec - %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, implicit $exec - %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, implicit $exec - %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, implicit $exec - %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, implicit $exec - %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, implicit $exec - %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, implicit $exec - %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, implicit $exec - %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, implicit $exec + %1:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec + %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, 0, 0, implicit $exec + %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec + %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, 0, 0, implicit $exec + %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, 0, 0, implicit $exec + %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, 0, 0, implicit $exec + %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, 0, 0, implicit $exec + %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, 0, 0, implicit $exec + %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, 0, 0, implicit $exec + %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, 0, 0, implicit $exec + %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, 0, 0, implicit $exec + %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, 0, 0, implicit $exec + %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, 0, 0, implicit $exec + %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, 0, 0, implicit $exec + %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, 0, 0, implicit $exec + %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 60, 0, 0, 0, implicit $exec + %17:vgpr_32 = GLOBAL_LOAD_DWORD %0, 64, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: reg_pressure{{$}} # GCN: dead early-clobber %2:vreg_128, dead early-clobber %4:vreg_128, dead early-clobber %1:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vreg_128 = BUNDLE %0, implicit $exec { -# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec -# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec -# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec -# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec -# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, implicit $exec +# GCN-NEXT: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec # GCN-NEXT: } # GCN-NEXT: dead early-clobber %7:vreg_128, dead early-clobber %6:vreg_128 = BUNDLE %0, implicit $exec { -# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, implicit $exec -# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, implicit $exec +# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec # GCN-NEXT: } --- @@ -294,13 +294,13 @@ body: | bb.0: %0 = IMPLICIT_DEF - %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec - %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, implicit $exec - %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, implicit $exec - %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, implicit $exec - %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, implicit $exec - %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, implicit $exec - %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, implicit $exec + %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec + %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, 0, 0, implicit $exec + %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, 0, 0, implicit $exec + %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, 0, 0, implicit $exec + %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, 0, 0, implicit $exec + %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, 0, 0, implicit $exec + %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: image_clause{{$}} @@ -336,8 +336,8 @@ # GCN-LABEL: {{^}}name: mixed_clause{{$}} # GCN: dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec { # GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec -# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec -# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec +# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec # GCN-NEXT: } --- @@ -356,8 +356,8 @@ %1 = IMPLICIT_DEF %2 = IMPLICIT_DEF %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec - %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, implicit $exec - %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, implicit $exec + %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec + %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: {{^}}name: atomic{{$}} Index: llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ llvm/trunk/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -23,7 +23,7 @@ # COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc # COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec -# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec +# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec # COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc # COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec # COMMON-LABEL bb.2: @@ -47,7 +47,7 @@ %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec + %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 @@ -69,7 +69,7 @@ # COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc # COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec -# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec +# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec # COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc # COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec # COMMON-LABEL bb.2: @@ -93,7 +93,7 @@ %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec + %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 @@ -115,7 +115,7 @@ # COMMON: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # COMMON: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc # COMMON: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec -# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec +# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec # COMMON: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc # COMMON: S_CBRANCH_EXECNZ %bb.1, implicit $exec # COMMON-LABEL bb.2: @@ -139,7 +139,7 @@ %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, implicit $exec + %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 @@ -155,7 +155,7 @@ # COMMON: %9:vgpr_32 = V_ADD_I32_e32 %12.sub0, %4.sub0, implicit-def $vcc, implicit $exec # COMMON: %10:vgpr_32 = V_ADDC_U32_e32 %12.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec # COMMON: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1 -# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, implicit $exec +# COMMON: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec --- name: addr64 liveins: @@ -175,7 +175,7 @@ %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, implicit $exec + %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 @@ -198,7 +198,7 @@ # NO-ADDR64: [[CMP1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec # NO-ADDR64: [[CMP:%[0-9]+]]:sreg_64 = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc # NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec -# NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec +# NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec # NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc # NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec # NO-ADDR64-LABEL bb.2: @@ -211,7 +211,7 @@ # ADDR64: [[RSRCFMTHI:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 # ADDR64: [[ZERORSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[ZERO64]], %subreg.sub0_sub1, [[RSRCFMTLO]], %subreg.sub2, [[RSRCFMTHI]], %subreg.sub3 # ADDR64: [[VADDR64:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[RSRCPTR]].sub0, %subreg.sub0, [[RSRCPTR]].sub1, %subreg.sub1 -# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, implicit $exec +# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, 0, implicit $exec --- name: offset @@ -232,7 +232,7 @@ %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 %6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, implicit $exec + %7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 $vgpr0 = COPY %7 S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 Index: llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir +++ llvm/trunk/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir @@ -105,8 +105,8 @@ %3 = COPY $sgpr0_sgpr1 %2 = COPY $vgpr0 - %7 = S_LOAD_DWORDX2_IMM %3, 9, 0 - %8 = S_LOAD_DWORDX2_IMM %3, 11, 0 + %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0 + %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0 %6 = COPY %7 %9 = S_MOV_B32 0 %10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1 @@ -137,7 +137,7 @@ %28 = REG_SEQUENCE %6, 17, killed %27, 18 %29 = V_MOV_B32_e32 0, implicit $exec %30 = COPY %24 - BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.bb2: SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec @@ -203,9 +203,9 @@ %3 = COPY $sgpr0_sgpr1 %2 = COPY $vgpr0 - %7 = S_LOAD_DWORDX2_IMM %3, 9, 0 - %8 = S_LOAD_DWORDX2_IMM %3, 11, 0 - %9 = S_LOAD_DWORDX2_IMM %3, 13, 0 + %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0 + %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0 + %9 = S_LOAD_DWORDX2_IMM %3, 13, 0, 0 %6 = COPY %7 %10 = S_MOV_B32 0 %11 = REG_SEQUENCE %2, %subreg.sub0, killed %10, %subreg.sub1 @@ -243,7 +243,7 @@ %37 = REG_SEQUENCE %6, 17, killed %36, 18 %38 = V_MOV_B32_e32 0, implicit $exec %39 = COPY %33 - BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.bb2: SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec @@ -300,8 +300,8 @@ %3 = COPY $sgpr0_sgpr1 %2 = COPY $vgpr0 - %7 = S_LOAD_DWORDX2_IMM %3, 9, 0 - %8 = S_LOAD_DWORDX2_IMM %3, 11, 0 + %7 = S_LOAD_DWORDX2_IMM %3, 9, 0, 0 + %8 = S_LOAD_DWORDX2_IMM %3, 11, 0, 0 %6 = COPY %7 %9 = S_MOV_B32 0 %10 = REG_SEQUENCE %2, %subreg.sub0, killed %9, %subreg.sub1 @@ -332,7 +332,7 @@ %28 = REG_SEQUENCE %6, 17, killed %27, 18 %29 = V_MOV_B32_e32 0, implicit $exec %30 = COPY %24 - BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.bb2: SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ llvm/trunk/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -151,7 +151,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -159,7 +159,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -188,7 +188,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -196,7 +196,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -225,7 +225,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -233,14 +233,14 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- # CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc -# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec +# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3 # CHECK-NEXT: SI_MASK_BRANCH @@ -255,7 +255,7 @@ $vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec $vgpr0 = V_MOV_B32_e32 4, implicit $exec $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc - BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc $exec = S_MOV_B64_term killed $sgpr2_sgpr3 SI_MASK_BRANCH %bb.2, implicit $exec @@ -266,7 +266,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -274,7 +274,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -304,7 +304,7 @@ bb.1.if: liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7 @@ -312,7 +312,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -346,7 +346,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -356,7 +356,7 @@ $sgpr1 = S_MOV_B32 1 $sgpr2 = S_MOV_B32 -1 $sgpr3 = S_MOV_B32 61440 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -387,7 +387,7 @@ S_SLEEP 0, implicit $sgpr2_sgpr3 $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -395,7 +395,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -426,7 +426,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -434,7 +434,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -463,7 +463,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -471,7 +471,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -500,7 +500,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -508,7 +508,7 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -539,7 +539,7 @@ $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 - $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2.end: liveins: $vgpr0, $sgpr0_sgpr1 @@ -547,6 +547,6 @@ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir +++ llvm/trunk/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir @@ -27,12 +27,12 @@ ; CHECK: liveins: $sgpr4, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr5 = COPY $sgpr4 ; CHECK: $sgpr6 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc - ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5) + ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5) ; CHECK: S_BRANCH %bb.1 ; CHECK: bb.1: ; CHECK: liveins: $sgpr5, $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK: $sgpr4 = S_ADD_U32 $sgpr5, 524288, implicit-def $scc - ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5) + ; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5) ; CHECK: S_ENDPGM 0, implicit $vgpr0 bb.0: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) Index: llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir +++ llvm/trunk/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir @@ -8,7 +8,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 @@ -34,12 +34,12 @@ %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 - %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6144 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 - %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec + %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec ... --- @@ -61,7 +61,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 @@ -87,17 +87,17 @@ %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 - %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 6400 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 - %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec + %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 11200 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 - %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec + %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec ... --- @@ -114,7 +114,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 @@ -140,17 +140,17 @@ %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 - %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec %32:sgpr_32 = S_MOV_B32 8192 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1 - %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec + %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec %39:sgpr_32 = S_MOV_B32 10240 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1 - %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec + %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec ... --- @@ -159,7 +159,7 @@ body: | bb.0.entry: %0:sgpr_64 = COPY $sgpr0_sgpr1 - %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 %3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 %4:sreg_32_xm0 = COPY $sgpr101 %5:sreg_32_xm0 = S_MOV_B32 0 @@ -186,5 +186,5 @@ %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1 - %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec + %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec ... Index: llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir +++ llvm/trunk/test/CodeGen/AMDGPU/readlane_exec0.mir @@ -22,7 +22,7 @@ $sgpr10 = V_READFIRSTLANE_B32 $vgpr2, implicit $exec $sgpr11 = V_READFIRSTLANE_B32 $vgpr3, implicit $exec - $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0 + $sgpr10 = S_LOAD_DWORD_IMM killed $sgpr10_sgpr11, 0, 0, 0 S_WAITCNT 127 $vgpr0 = V_XOR_B32_e32 killed $sgpr10, killed $vgpr0, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir +++ llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir @@ -185,9 +185,9 @@ bb.28: %9 = S_FF1_I32_B32 undef %10 %13 = V_MAD_U32_U24 killed %9, 48, 32, 0, implicit $exec - %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) + %45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4) %46 = V_AND_B32_e32 1, killed %45, implicit $exec - %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load 4) + %21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0, 0 :: (dereferenceable invariant load 4) %25 = V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $exec %26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec %62 = IMPLICIT_DEF Index: llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir +++ llvm/trunk/test/CodeGen/AMDGPU/regcoal-subrange-join.mir @@ -96,14 +96,14 @@ %0.sub0 = COPY killed %12 %21 = COPY killed %18 %21.sub0 = COPY killed %15 - %22 = S_LOAD_DWORD_IMM killed %21, 2, 0 + %22 = S_LOAD_DWORD_IMM killed %21, 2, 0, 0 %23 = S_MOV_B32 491436 undef %24.sub0 = COPY killed %22 %24.sub1 = COPY killed %23 - %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0 + %25 = S_LOAD_DWORDX4_IMM killed %24, 0, 0, 0 %1 = COPY killed %25 - %26 = S_LOAD_DWORDX2_IMM %0, 2, 0 - dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0 + %26 = S_LOAD_DWORDX2_IMM %0, 2, 0, 0 + dead %27 = S_LOAD_DWORD_IMM killed %26, 0, 0, 0 S_CBRANCH_SCC0 %bb.1, implicit undef $scc bb.5: @@ -129,8 +129,8 @@ bb.2: %4 = COPY killed %59 %3 = COPY killed %58 - %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0 - %40 = S_LOAD_DWORD_IMM killed %39, 0, 0 + %39 = S_LOAD_DWORDX2_IMM killed %0, 6, 0, 0 + %40 = S_LOAD_DWORD_IMM killed %39, 0, 0, 0 %43 = V_MOV_B32_e32 -1102263091, implicit $exec %60 = COPY killed %4 %61 = COPY killed %3 Index: llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir +++ llvm/trunk/test/CodeGen/AMDGPU/regcoalesce-dbg.mir @@ -56,8 +56,8 @@ %3 = COPY killed $vgpr0 %0 = COPY killed $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %5 = S_LOAD_DWORD_IMM killed %0, 13, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %5 = S_LOAD_DWORD_IMM killed %0, 13, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) %18 = V_ASHRREV_I32_e32 31, %3, implicit $exec undef %19.sub0 = COPY killed %3 %19.sub1 = COPY killed %18 @@ -70,7 +70,7 @@ %13.sub2_sub3 = COPY killed %12 %20 = V_LSHL_B64 killed %19, 2, implicit $exec %16 = COPY killed %5 - BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir +++ llvm/trunk/test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir @@ -16,7 +16,7 @@ %23:vgpr_32 = V_CVT_U32_F32_e32 killed %21, implicit $exec %108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec undef %109.sub1:vreg_128 = COPY %108 - %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0 :: (dereferenceable invariant load 4) + %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0, 0 :: (dereferenceable invariant load 4) S_CMP_EQ_U32 killed %28, 0, implicit-def $scc S_CBRANCH_SCC0 %bb.2, implicit killed $scc @@ -47,7 +47,7 @@ S_BRANCH %bb.6 bb.6: - %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0 :: (dereferenceable invariant load 4) + %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0, 0 :: (dereferenceable invariant load 4) %39:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec %40:vgpr_32 = V_MAD_F32 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec %41:vgpr_32 = V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $exec @@ -83,7 +83,7 @@ S_BRANCH %bb.8 bb.8: - dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0 :: (dereferenceable invariant load 4) + dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0, 0 :: (dereferenceable invariant load 4) %138:vreg_128 = COPY killed %111 bb.9: Index: llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir +++ llvm/trunk/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir @@ -63,12 +63,12 @@ bb.3: %1 = COPY killed %17 - FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %14 = COPY %1.sub1 %16 = COPY killed %1.sub0 undef %15.sub0 = COPY killed %16 %15.sub1 = COPY killed %14 - FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... @@ -134,10 +134,10 @@ %6.sub2 = COPY %6.sub0 bb.2: - BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec $sgpr30_sgpr31 = COPY %5 S_SETPC_B64_return $sgpr30_sgpr31 Index: llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir +++ llvm/trunk/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir @@ -58,7 +58,7 @@ body: | bb.0: - S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 + S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0 S_ENDPGM 0 ... --- @@ -76,7 +76,7 @@ body: | bb.0: - S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 + S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0 S_DCACHE_WB S_ENDPGM 0 ... @@ -97,7 +97,7 @@ body: | bb.0: S_DCACHE_WB - S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 + S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0 S_ENDPGM 0 ... --- @@ -132,11 +132,11 @@ body: | bb.0: - S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 + S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0 S_ENDPGM 0 bb.1: - S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0 + S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0 S_ENDPGM 0 ... ... @@ -164,7 +164,7 @@ S_ENDPGM 0 bb.1: - S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0 + S_STORE_DWORD_SGPR undef $sgpr4, undef $sgpr6_sgpr7, undef $m0, 0, 0 S_ENDPGM 0 ... --- @@ -182,6 +182,6 @@ body: | bb.0: - S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0 + S_STORE_DWORD_SGPR undef $sgpr2, undef $sgpr0_sgpr1, undef $m0, 0, 0 SI_RETURN_TO_EPILOG undef $vgpr0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir @@ -23,8 +23,8 @@ ; CHECK: liveins: $vgpr0 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, implicit $exec - ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, 0, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, 0, implicit $exec ; CHECK: undef %4.sub1:vreg_64 = V_ADD_U32_e32 [[COPY]], [[COPY]], implicit $exec ; CHECK: %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec ; CHECK: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF @@ -40,19 +40,19 @@ ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]] ; CHECK: undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $exec ; CHECK: dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $exec - ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, implicit $exec + ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec ; CHECK: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK: undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $exec ; CHECK: %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $exec - ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec - ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, implicit $exec - ; CHECK: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, implicit $exec - ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec - ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, implicit $exec + ; CHECK: GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec + ; CHECK: %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF1]], 0, 0, 0, 0, implicit $exec + ; CHECK: [[DEF2]].sub0:vreg_64 = GLOBAL_LOAD_DWORD [[DEF3]], 0, 0, 0, 0, implicit $exec + ; CHECK: dead %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, implicit $exec + ; CHECK: dead %21:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF4]], 0, 0, 0, 0, implicit $exec ; CHECK: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 2, [[DEF2]], implicit $exec - ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, implicit $exec + ; CHECK: dead %22:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF5]], 0, 0, 0, 0, implicit $exec ; CHECK: S_NOP 0, implicit [[DEF7]], implicit [[V_LSHLREV_B64_]].sub0, implicit [[DEF6]], implicit [[V_MOV_B32_e32_]] - ; CHECK: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, implicit $exec + ; CHECK: GLOBAL_STORE_DWORD [[DEF5]], [[V_MOV_B32_e32_1]], 0, 0, 0, 0, implicit $exec ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) ; CHECK: S_SETREG_IMM32_B32 0, 1 @@ -69,14 +69,14 @@ %0:vgpr_32 = COPY $vgpr0 %1:vreg_64 = IMPLICIT_DEF - %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, implicit $exec - %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, implicit $exec + %2:vreg_64 = GLOBAL_LOAD_DWORDX2 %1, 0, 0, 0, 0, implicit $exec + %3:vgpr_32 = GLOBAL_LOAD_DWORD %1, 8, 0, 0, 0, implicit $exec undef %4.sub1:vreg_64 = V_ADD_U32_e32 %0, %0, implicit $exec %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec %5:vreg_64 = COPY %2 undef %6.sub0:vreg_64 = V_ADD_F32_e32 %1.sub0, %5.sub0, implicit $exec %6.sub1:vreg_64 = V_ADD_F32_e32 %1.sub1, %5.sub0, implicit $exec - %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, implicit $exec + %7:vgpr_32 = GLOBAL_LOAD_DWORD %5, 0, 0, 0, 0, implicit $exec %8:vreg_64 = IMPLICIT_DEF %9:vreg_64 = IMPLICIT_DEF %10:vreg_64 = IMPLICIT_DEF @@ -90,15 +90,15 @@ %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec undef %19.sub0:vreg_64 = V_ADD_F32_e32 %7, %2.sub0, implicit $exec %19.sub1:vreg_64 = V_ADD_F32_e32 %3, %3, implicit $exec - GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, implicit $exec - %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, implicit $exec - %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, implicit $exec - %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, implicit $exec - %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, implicit $exec - %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORDX2 %19, %4, 32, 0, 0, 0, implicit $exec + %11.sub0:vreg_64 = GLOBAL_LOAD_DWORD %9, 0, 0, 0, 0, implicit $exec + %8.sub0:vreg_64 = GLOBAL_LOAD_DWORD %10, 0, 0, 0, 0, implicit $exec + %20:vgpr_32 = GLOBAL_LOAD_DWORD %11, 0, 0, 0, 0, implicit $exec + %21:vgpr_32 = GLOBAL_LOAD_DWORD %14, 0, 0, 0, 0, implicit $exec + %22:vgpr_32 = GLOBAL_LOAD_DWORD %15, 0, 0, 0, 0, implicit $exec %23:vreg_64 = V_LSHLREV_B64 2, %8, implicit $exec S_NOP 0, implicit %13, implicit %23.sub0, implicit %12, implicit %17 - GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD %15, %18, 0, 0, 0, 0, implicit $exec bb.1: S_SETREG_IMM32_B32 0, 1 Index: llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -200,12 +200,12 @@ %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 %0:vgpr_32 = COPY $vgpr0 - %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0 - %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0 - %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0 + %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0, 0 + %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0, 0 + %10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0, 0 %11:sreg_32_xm0 = S_LSHR_B32 %10.sub0, 16, implicit-def dead $scc %12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1 %13:vgpr_32 = V_MUL_LO_I32 0, %0, implicit $exec @@ -217,29 +217,29 @@ %19:sreg_32_xm0_xexec = IMPLICIT_DEF %20:vgpr_32 = V_ADD_I32_e32 %19, %0, implicit-def dead $vcc, implicit $exec %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32 %20, 12, %7, 0, implicit $exec - %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, implicit $exec + %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, 0, implicit $exec %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32 %20, 48, %8, 0, implicit $exec %26:vreg_128 = IMPLICIT_DEF - undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0 + undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0, 0 %27.sub1:sreg_64_xexec = S_MOV_B32 0 %28:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0, %28.sub0, implicit-def $scc %29.sub1:sreg_64 = S_ADDC_U32 %5.sub1, %28.sub1, implicit-def dead $scc, implicit killed $scc - undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0 + undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0, 0 %27.sub0:sreg_64_xexec = IMPLICIT_DEF %31:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc %32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0, implicit-def $scc %33:sgpr_32 = S_ADDC_U32 %5.sub1, %31.sub1, implicit-def dead $scc, implicit killed $scc %34:vgpr_32 = IMPLICIT_DEF %35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32 %23, %34, 0, 0, implicit $exec - %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, implicit $exec + %37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, 0, 0, implicit $exec undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec %38.sub0:vreg_64 = COPY %37.sub0 %39:vreg_64 = V_LSHLREV_B64 3, %38, implicit $exec undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, 0, implicit $exec %42:vgpr_32 = COPY %33 %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec - %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34) + %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34) undef %45.sub1:vreg_64 = IMPLICIT_DEF %45.sub0:vreg_64 = COPY %37.sub1 %46:vreg_64 = V_LSHLREV_B64 3, %45, implicit $exec @@ -247,7 +247,7 @@ %49:vgpr_32 = COPY %33 %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec %51:vreg_64 = IMPLICIT_DEF - undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8) + undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8) %52.sub1:vreg_64 = IMPLICIT_DEF %53:vreg_64 = V_LSHLREV_B64 3, %52, implicit $exec undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, 0, implicit $exec @@ -258,31 +258,31 @@ %59:sreg_64 = IMPLICIT_DEF %60:sreg_32_xm0 = S_ADD_U32 %5.sub0, %59.sub0, implicit-def $scc %61:sgpr_32 = S_ADDC_U32 %5.sub1, %59.sub1, implicit-def dead $scc, implicit killed $scc - %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4) + %62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.20, align 4) undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec %63.sub0:vreg_64 = COPY %62.sub0 %64:vreg_64 = IMPLICIT_DEF undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, 0, implicit $exec %67:vgpr_32 = COPY %61 %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec - %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58) + %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58) undef %70.sub1:vreg_64 = IMPLICIT_DEF %70.sub0:vreg_64 = IMPLICIT_DEF %71:vreg_64 = IMPLICIT_DEF undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, 0, implicit $exec %74:vgpr_32 = COPY %61 %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec - %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, implicit $exec + %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, 0, implicit $exec %77:vgpr_32 = IMPLICIT_DEF %78:vgpr_32 = IMPLICIT_DEF %79:vgpr_32 = V_MUL_F32_e32 0, %77, implicit $exec %80:vgpr_32 = IMPLICIT_DEF %81:vgpr_32 = IMPLICIT_DEF %84:vgpr_32 = IMPLICIT_DEF - BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, implicit $exec - BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, 0, implicit $exec %85:vgpr_32 = IMPLICIT_DEF %86:vgpr_32 = IMPLICIT_DEF %87:vgpr_32 = IMPLICIT_DEF Index: llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir +++ llvm/trunk/test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -47,7 +47,7 @@ liveins: $sgpr4_sgpr5 %1 = COPY $sgpr4_sgpr5 - %5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + %5 = S_LOAD_DWORD_IMM %1, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) $m0 = S_MOV_B32 -1 %7 = COPY %5 %6 = DS_READ_B32 %7, 0, 0, implicit $m0, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-gfx9.mir @@ -37,11 +37,11 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %12 = S_MOV_B32 123 %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec %11 = V_ADD_I32_e32 %12, killed %10, implicit-def $vcc, implicit $exec - FLAT_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -80,9 +80,9 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec %11 = V_TRUNC_F32_e64 0, killed %10, 1, 2, implicit-def $vcc, implicit $exec - FLAT_STORE_DWORD %0, %11, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-ops.mir @@ -29,19 +29,19 @@ %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %173:vgpr_32, %175:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %171, 0, implicit $exec %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -77,13 +77,13 @@ %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -113,7 +113,7 @@ %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -143,7 +143,7 @@ %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -172,7 +172,7 @@ %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -201,7 +201,7 @@ %30:vreg_64 = COPY $sgpr0_sgpr1 %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -232,7 +232,7 @@ %30:vreg_64 = COPY $sgpr0_sgpr1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -263,7 +263,7 @@ %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -294,7 +294,7 @@ %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %31:vreg_64 = COPY $vcc %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -325,7 +325,7 @@ %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -356,7 +356,7 @@ %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1 %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %32, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) ... @@ -386,5 +386,5 @@ %31:vreg_64 = COPY killed $vcc %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1 - GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) + GLOBAL_STORE_DWORDX2_SADDR %31, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8) Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir @@ -89,7 +89,7 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -139,7 +139,7 @@ %100 = V_MOV_B32_e32 %48, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -256,7 +256,7 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -315,7 +315,7 @@ %100 = V_MOV_B32_e32 %60, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -401,7 +401,7 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %5 = S_MOV_B32 65535 %6 = S_MOV_B32 65535 @@ -442,6 +442,6 @@ %100 = V_MOV_B32_e32 $vcc_lo, implicit $exec - FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-preserve.mir @@ -36,8 +36,8 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %5 = V_AND_B32_e32 65535, %3, implicit $exec %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec @@ -51,7 +51,7 @@ %13 = V_OR_B32_e64 %10, %12, implicit $exec - FLAT_STORE_DWORD %0, %13, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 @@ -88,14 +88,14 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec %10:sreg_32_xm0 = S_MOV_B32 255 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) - FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) S_ENDPGM 0 ... @@ -131,14 +131,14 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) - %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec %10:sreg_32_xm0 = S_MOV_B32 65535 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0) - FLAT_STORE_DWORD %0, %17, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir @@ -203,7 +203,7 @@ liveins: $sgpr4_sgpr5 %4 = COPY $sgpr4_sgpr5 - %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %8 = S_MOV_B64 0 %7 = COPY %9 %30 = V_MOV_B32_e32 1, implicit $exec @@ -221,26 +221,26 @@ %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc %16 = REG_SEQUENCE %14, 1, %15, 2 %18 = COPY %16 - %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) + %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) %60 = V_BFE_U32 %17, 8, 8, implicit $exec %61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %67 = REG_SEQUENCE %70, 1, killed %65, 2 - FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9) + FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9) %37 = S_ADD_U32 %14, 4, implicit-def $scc %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc %71 = COPY killed %37 %72 = COPY killed %38 %41 = REG_SEQUENCE killed %71, 1, killed %72, 2 - %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) %73 = V_BFE_U32 %40, 8, 8, implicit $exec %74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %80 = REG_SEQUENCE %83, 1, killed %78, 2 - FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) + FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc %57 = REG_SEQUENCE %55, 1, killed %56, 2 @@ -365,7 +365,7 @@ liveins: $sgpr4_sgpr5 %4 = COPY $sgpr4_sgpr5 - %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + %9 = S_LOAD_DWORDX2_IMM %4, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) %8 = S_MOV_B64 0 %7 = COPY %9 %30 = V_MOV_B32_e32 1, implicit $exec @@ -384,26 +384,26 @@ %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead $scc, implicit $scc %16 = REG_SEQUENCE %14, 1, %15, 2 %18 = COPY %16 - %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) + %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45) %60 = V_BFE_U32 %17, 8, 8, implicit $exec %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec %66 = COPY %13 %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %67 = REG_SEQUENCE %70, 1, killed %65, 2 - FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9) + FLAT_STORE_DWORD %67, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp9) %37 = S_ADD_U32 %14, 4, implicit-def $scc %38 = S_ADDC_U32 %15, 0, implicit-def dead $scc, implicit $scc %71 = COPY killed %37 %72 = COPY killed %38 %41 = REG_SEQUENCE killed %71, 1, killed %72, 2 - %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep) %73 = V_BFE_U32 %40, 8, 8, implicit $exec %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec %80 = REG_SEQUENCE %83, 1, killed %78, 2 - FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) + FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17) %55 = S_ADD_U32 %0.sub0, 8, implicit-def $scc %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead $scc, implicit $scc %57 = REG_SEQUENCE %55, 1, killed %56, 2 Index: llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir @@ -41,7 +41,7 @@ %2 = COPY $sgpr30_sgpr31 %1 = COPY $vgpr2_vgpr3 %0 = COPY $vgpr0_vgpr1 - %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) %12 = V_LSHRREV_B32_e64 16, %3, implicit $exec %13 = V_BCNT_U32_B32_e64 %3, killed %12, implicit-def $vcc, implicit $exec @@ -56,6 +56,6 @@ %19 = V_READLANE_B32 killed %18, 0, implicit-def $vcc, implicit $exec %20 = V_MOV_B32_e64 %19, implicit $exec - FLAT_STORE_DWORD %0, %20, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + FLAT_STORE_DWORD %0, %20, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) $sgpr30_sgpr31 = COPY %2 S_SETPC_B64_return $sgpr30_sgpr31 Index: llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir +++ llvm/trunk/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir @@ -85,7 +85,7 @@ bb.0: %0:sreg_32_xm0 = COPY $sgpr5 %1:vreg_64 = IMPLICIT_DEF - %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit $exec, implicit $flat_scr + %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr %3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 dead $sgpr30_sgpr31 = SI_CALL %3, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0 Index: llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir +++ llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir @@ -21,7 +21,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec - GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec ... @@ -46,7 +46,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec - GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec ... @@ -71,7 +71,7 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec - GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec ... @@ -96,6 +96,6 @@ %2 = IMPLICIT_DEF %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec %4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec - GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, 0, implicit $exec ... Index: llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir +++ llvm/trunk/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir @@ -71,8 +71,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec %27 = REG_SEQUENCE %3, 1, %26, 2 %10 = S_MOV_B32 61440 @@ -81,11 +81,11 @@ %13 = REG_SEQUENCE killed %5, 17, %12, 18 %28 = V_LSHL_B64 killed %27, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 - %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec - %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec %29, %9 = V_ADD_I32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -155,8 +155,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec %27 = REG_SEQUENCE %3, 1, %26, 2 %10 = S_MOV_B32 61440 @@ -165,11 +165,11 @@ %13 = REG_SEQUENCE killed %5, 17, %12, 18 %28 = V_LSHL_B64 killed %27, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 - %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec - %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec %29, %9 = V_SUB_I32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -239,8 +239,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec %27 = REG_SEQUENCE %3, 1, %26, 2 %10 = S_MOV_B32 61440 @@ -249,11 +249,11 @@ %13 = REG_SEQUENCE killed %5, 17, %12, 18 %28 = V_LSHL_B64 killed %27, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 - %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec - %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec %29, %9 = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -322,8 +322,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec %27 = REG_SEQUENCE %3, 1, %26, 2 %10 = S_MOV_B32 61440 @@ -332,12 +332,12 @@ %13 = REG_SEQUENCE killed %5, 17, %12, 18 %28 = V_LSHL_B64 killed %27, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 - %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec - %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec %9 = S_MOV_B64 0 %29, $vcc = V_ADDC_U32_e64 %19, %17, %9, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -407,8 +407,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec %27 = REG_SEQUENCE %3, 1, %26, 2 %10 = S_MOV_B32 61440 @@ -417,12 +417,12 @@ %13 = REG_SEQUENCE killed %5, 17, %12, 18 %28 = V_LSHL_B64 killed %27, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 - %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec - %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec $vcc = S_MOV_B64 0 %29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... @@ -492,8 +492,8 @@ %3 = COPY $vgpr0 %0 = COPY $sgpr0_sgpr1 - %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 - %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 %26 = V_ASHRREV_I32_e32 31, %3, implicit $exec %27 = REG_SEQUENCE %3, 1, %26, 2 %10 = S_MOV_B32 61440 @@ -502,11 +502,11 @@ %13 = REG_SEQUENCE killed %5, 17, %12, 18 %28 = V_LSHL_B64 killed %27, 2, implicit $exec %16 = REG_SEQUENCE killed %4, 17, %12, 18 - %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec - %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec + %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec + %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec %29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, 0, implicit $exec %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec - BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir +++ llvm/trunk/test/CodeGen/AMDGPU/si-lower-control-flow.mir @@ -15,7 +15,7 @@ ; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc ; GCN: S_ENDPGM 0 %0:sgpr_64 = COPY $sgpr4_sgpr5 - %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 %2:sreg_32_xm0 = S_AND_B32 %1, 255, implicit-def $scc %3:sreg_32_xm0 = S_AND_B32 65535, %2, implicit-def $scc S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir +++ llvm/trunk/test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir @@ -36,8 +36,8 @@ %3.sub1:sgpr_128 = S_AND_B32 %2, 65535, implicit-def dead $scc %3.sub3:sgpr_128 = S_MOV_B32 151468 %3.sub2:sgpr_128 = S_MOV_B32 -1 - %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4) - %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load 8) + %7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4) + %8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0, 0 :: (dereferenceable invariant load 8) undef %9.sub0:vreg_128 = V_LSHL_ADD_U32 %6, 4, %4, implicit $exec %9.sub1:vreg_128 = V_LSHL_ADD_U32 %5, 4, %0, implicit $exec S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir +++ llvm/trunk/test/CodeGen/AMDGPU/smrd-fold-offset.mir @@ -19,7 +19,7 @@ %8:vgpr_32 = COPY %6 %7:vgpr_32 = V_ADD_I32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec %10:sreg_32 = COPY %7 - %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 + %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0 $vgpr0 = COPY %9 SI_RETURN_TO_EPILOG $vgpr0 ... @@ -43,7 +43,7 @@ %8:vgpr_32 = COPY %6 %7:vgpr_32 = V_ADD_U32_e32 %4, killed %8, implicit $exec %10:sreg_32 = COPY %7 - %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0 :: (dereferenceable invariant load 4) + %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0 :: (dereferenceable invariant load 4) $vgpr0 = COPY %9 SI_RETURN_TO_EPILOG $vgpr0 Index: llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir +++ llvm/trunk/test/CodeGen/AMDGPU/stack-slot-color-sgpr-vgpr-spills.mir @@ -24,10 +24,10 @@ stackPtrOffsetReg: $sgpr32 body: | bb.0: - %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, implicit $flat_scr, implicit $exec - %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, implicit $flat_scr, implicit $exec + %0:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $flat_scr, implicit $exec + %2:vgpr_32 = FLAT_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, 0, 0, implicit $flat_scr, implicit $exec S_NOP 0, implicit %0 - %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0 - %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0 + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0 + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0 S_NOP 0, implicit %1 ... Index: llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir +++ llvm/trunk/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir @@ -165,12 +165,12 @@ %18:vgpr_32 = V_MAD_F32 0, %10.sub0, 0, target-flags(amdgpu-gotprel) 1073741824, 0, -1082130432, 0, 0, implicit $exec %19:vgpr_32 = V_MAD_F32 0, %12.sub0, 0, target-flags(amdgpu-gotprel) 0, 0, 0, 0, 0, implicit $exec - %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0 :: (dereferenceable invariant load 16) + %20:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %21:sreg_128, 1040, 0, 0 :: (dereferenceable invariant load 16) %22:vgpr_32 = V_ADD_F32_e32 0, %19, implicit $exec %23:vgpr_32 = V_MAD_F32 0, %18, 0, 0, 0, 0, 0, 0, implicit $exec %24:vgpr_32 = COPY %20.sub3 %25:vgpr_32 = V_MUL_F32_e64 0, target-flags(amdgpu-gotprel32-lo) 0, 0, %20.sub1, 0, 0, implicit $exec - %26:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sreg_128, 1056, 0 :: (dereferenceable invariant load 16) + %26:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %27:sreg_128, 1056, 0, 0 :: (dereferenceable invariant load 16) %28:vgpr_32 = V_MAD_F32 0, %18, 0, %26.sub0, 0, 0, 0, 0, implicit $exec %29:vgpr_32 = V_ADD_F32_e32 %28, %19, implicit $exec %30:vgpr_32 = V_RCP_F32_e32 %29, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll +++ llvm/trunk/test/CodeGen/AMDGPU/syncscopes.ll @@ -1,9 +1,9 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -stop-after=si-insert-skips < %s | FileCheck --check-prefix=GCN %s ; GCN-LABEL: name: syncscopes -; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out) -; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) -; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) +; GCN: FLAT_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("agent") seq_cst 4 into %ir.agent_out) +; GCN: FLAT_STORE_DWORD killed renamable $vgpr4_vgpr5, killed renamable $vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) +; GCN: FLAT_STORE_DWORD killed renamable $vgpr7_vgpr8, killed renamable $vgpr6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) define void @syncscopes( i32 %agent, i32* %agent_out, Index: llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ llvm/trunk/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -44,7 +44,7 @@ liveins: $vgpr0, $sgpr4_sgpr5 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec - $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) + $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec @@ -108,7 +108,7 @@ liveins: $vgpr0, $sgpr4_sgpr5 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec - $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) + $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) $vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ llvm/trunk/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -78,7 +78,7 @@ bb.0.entry: liveins: $sgpr0_sgpr1, $vcc - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $exec @@ -88,7 +88,7 @@ liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 9, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 0, implicit $exec S_BRANCH %bb.3 @@ -96,7 +96,7 @@ liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 100, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 1, implicit $exec bb.3.done: @@ -104,7 +104,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) S_ENDPGM 0 ... @@ -140,7 +140,7 @@ bb.0.entry: liveins: $sgpr0_sgpr1 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 S_CBRANCH_VCCZ %bb.1, implicit undef $vcc @@ -149,7 +149,7 @@ liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 9, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 0, implicit $exec S_BRANCH %bb.3 @@ -157,7 +157,7 @@ liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 $vgpr0 = V_MOV_B32_e32 100, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`) $vgpr0 = V_MOV_B32_e32 1, implicit $exec bb.3.done: @@ -165,7 +165,7 @@ $sgpr3 = S_MOV_B32 61440 $sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir +++ llvm/trunk/test/CodeGen/AMDGPU/vmem-vcc-hazard.mir @@ -19,7 +19,7 @@ $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec bb.1: - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_branch_to_next # GCN: bb.1: @@ -40,7 +40,7 @@ S_BRANCH %bb.1 bb.1: - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_too_far # GCN: bb.1: @@ -61,7 +61,7 @@ $sgpr0 = S_MOV_B32 0 bb.1: - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_nops # GCN: bb.1: @@ -78,7 +78,7 @@ S_NOP 4 bb.1: - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_branch_around # GCN: bb.2: @@ -107,7 +107,7 @@ S_NOP 0 bb.2: - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_branch_backedge # GCN: S_NOP @@ -123,7 +123,7 @@ $vgpr0 = IMPLICIT_DEF $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec bb.1: $vgpr0 = IMPLICIT_DEF @@ -156,7 +156,7 @@ $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec bb.2: - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec ... # GCN-LABEL: name: vmem_vcc_self_loop # GCN: S_NOP @@ -172,7 +172,7 @@ $vgpr0 = IMPLICIT_DEF $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec S_BRANCH %bb.0 ... @@ -198,7 +198,7 @@ successors: %bb.1 $sgpr0 = S_MOV_B32 0 - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec S_BRANCH %bb.1 ... @@ -224,7 +224,7 @@ successors: %bb.1 $sgpr0 = S_MOV_B32 0 - $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec S_BRANCH %bb.1 ... Index: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir +++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-back-edge-loop.mir @@ -13,8 +13,8 @@ $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2 $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2 - $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) + $vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec $vgpr3 = V_CNDMASK_B32_e64 0, -1082130432, 0, 1065353216, killed $sgpr0_sgpr1, implicit $exec $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec @@ -23,7 +23,7 @@ bb.3: successors: %bb.1 - $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) + $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) bb.1: successors: %bb.5, %bb.2 @@ -43,7 +43,7 @@ bb.4: successors: %bb.3, %bb.1 - $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) + $vgpr5 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1) $vgpr4 = V_CVT_I32_F32_e32 $vgpr5, implicit $exec V_CMP_EQ_U32_e32 2, killed $vgpr4, implicit-def $vcc, implicit $exec $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc Index: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir +++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-irreducible.mir @@ -37,8 +37,8 @@ bb.2: successors: %bb.3 - renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 - renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0 + renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 + renamable $sgpr3 = S_BUFFER_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0 bb.3: successors: %bb.1, %bb.4 @@ -73,12 +73,12 @@ body: | bb.0: successors: %bb.1, %bb.2 - $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0 + $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0, 0 S_CBRANCH_VCCZ %bb.2, implicit $vcc bb.1: successors: %bb.2 - BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec bb.2: successors: %bb.3, %bb.6 @@ -86,18 +86,18 @@ bb.3: successors: %bb.4, %bb.5 - BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec S_CBRANCH_VCCNZ %bb.5, implicit $vcc bb.4: successors: %bb.5 - renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0 + renamable $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 64, 0, 0 renamable $vgpr2 = BUFFER_ATOMIC_ADD_OFFSET_RTN killed renamable $vgpr2, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, implicit $exec bb.5: successors: %bb.6 bb.6: - FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir +++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-loop-single-basic-block.mir @@ -15,11 +15,11 @@ bb.0: S_BRANCH %bb.1 bb.1: - GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, 0, implicit $exec - $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, implicit $exec - $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, implicit $exec - GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, 0, implicit $exec - $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD $vgpr7_vgpr8, $vgpr11, 0, 0, 0, 0, implicit $exec + $vgpr21 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec + $vgpr10 = GLOBAL_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec + GLOBAL_STORE_DWORD $vgpr14_vgpr15, $vgpr21, 0, 0, 0, 0, implicit $exec + $vgpr11 = GLOBAL_LOAD_DWORD $vgpr11_vgpr12, 0, 0, 0, 0, implicit $exec S_CBRANCH_SCC1 %bb.1, implicit $scc bb.2: S_ENDPGM 0 Index: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir +++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-preexisting.mir @@ -20,7 +20,7 @@ bb.0: liveins: $sgpr0, $sgpr1, $vgpr0 - renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0 + renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX8_IMM renamable $sgpr0_sgpr1, 480, 0, 0 renamable $vgpr13 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec S_WAITCNT -16257 renamable $vgpr0_vgpr1 = DS_READ2_B32 renamable $vgpr13, 0, 1, 0, implicit $m0, implicit $exec Index: llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir +++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir @@ -44,21 +44,21 @@ body: | bb.0: successors: %bb.1 - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) - $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) + $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec S_BRANCH %bb.1 bb.1: successors: %bb.2 - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr - $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec S_BRANCH %bb.2 bb.2: - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) - $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) + $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec S_ENDPGM 0 ... @@ -79,11 +79,11 @@ body: | bb.0: successors: %bb.1 - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr bb.1: $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec - FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- @@ -106,15 +106,15 @@ body: | bb.0: successors: %bb.2 - $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr + $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_BRANCH %bb.2 bb.1: - FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 bb.2: $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec - FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir =================================================================== --- llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir +++ llvm/trunk/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir @@ -16,13 +16,13 @@ %0:sgpr_64 = COPY $sgpr0_sgpr1 %vreg123_1:vgpr_32 = COPY %11 %27:vreg_64 = REG_SEQUENCE %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1 - %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0 + %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 %vreg123_2:vgpr_32 = COPY %4 - %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0 + %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 %vreg123_3:vgpr_32 = COPY %5 %16:sgpr_128 = REG_SEQUENCE killed %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1, %vreg123_2, %subreg.sub2, %vreg123_3, %subreg.sub3 - BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir =================================================================== --- llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir +++ llvm/trunk/test/CodeGen/MIR/AMDGPU/parse-order-reserved-regs.mir @@ -11,7 +11,7 @@ # CHECK: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' # CHECK: scratchWaveOffsetReg: '$sgpr50' # CHECK: frameOffsetReg: '$sgpr50' -# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) +# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) name: reserve_correct_register tracksRegLiveness: true machineFunctionInfo: @@ -24,6 +24,6 @@ body: | bb.0: - renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir =================================================================== --- llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir +++ llvm/trunk/test/CodeGen/MIR/AMDGPU/syncscopes.mir @@ -42,9 +42,9 @@ !0 = !{i32 1} # GCN-LABEL: name: syncscopes -# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4) -# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4) -# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4) +# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out, addrspace 4) +# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out, addrspace 4) +# GCN: FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out, addrspace 4) ... --- name: syncscopes @@ -74,27 +74,27 @@ liveins: $sgpr4_sgpr5 S_WAITCNT 0 - $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) - $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) - $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) - $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr6 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr2_sgpr3 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 24, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr7 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 16, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) + $sgpr8 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 32, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(4)* undef`) S_WAITCNT 127 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr0_sgpr1 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 40, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec - FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out) + FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("agent") seq_cst 4 into %ir.agent_out) S_WAITCNT 112 $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr2_sgpr3 $vgpr1 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr2_sgpr3, implicit $sgpr2_sgpr3, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec - FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) + FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("workgroup") seq_cst 4 into %ir.workgroup_out) S_WAITCNT 112 $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5 $vgpr1 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $sgpr4_sgpr5, implicit $exec $vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec - FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) + FLAT_STORE_DWORD killed $vgpr0_vgpr1, killed $vgpr2, 0, -1, 0, 0, implicit $exec, implicit $flat_scr :: (volatile non-temporal store syncscope("wavefront") seq_cst 4 into %ir.wavefront_out) S_ENDPGM 0 ... Index: llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir =================================================================== --- llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir +++ llvm/trunk/test/CodeGen/MIR/AMDGPU/target-index-operands.mir @@ -40,19 +40,19 @@ $sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start), implicit-def $scc, implicit-def $scc $sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc $sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc - $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0 + $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0 $sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc $sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc $sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc $sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc $sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc $sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc - $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0 + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0 $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... --- @@ -70,18 +70,18 @@ $sgpr2 = S_ADD_U32 $sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def $scc, implicit-def $scc $sgpr3 = S_ADDC_U32 $sgpr3, 0, implicit-def $scc, implicit $scc, implicit-def $scc, implicit $scc $sgpr4_sgpr5 = S_LSHR_B64 $sgpr2_sgpr3, 32, implicit-def dead $scc - $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0 + $sgpr6 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 11, 0, 0 $sgpr7 = S_ASHR_I32 $sgpr6, 31, implicit-def dead $scc $sgpr6_sgpr7 = S_LSHL_B64 $sgpr6_sgpr7, 2, implicit-def dead $scc $sgpr2 = S_ADD_U32 $sgpr2, @float_gv, implicit-def $scc $sgpr3 = S_ADDC_U32 $sgpr4, 0, implicit-def dead $scc, implicit $scc $sgpr4 = S_ADD_U32 $sgpr2, $sgpr6, implicit-def $scc $sgpr5 = S_ADDC_U32 $sgpr3, $sgpr7, implicit-def dead $scc, implicit $scc - $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0 - $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0 + $sgpr2 = S_LOAD_DWORD_IMM $sgpr4_sgpr5, 0, 0, 0 + $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 9, 0, 0 $sgpr7 = S_MOV_B32 61440 $sgpr6 = S_MOV_B32 -1 $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec - BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec S_ENDPGM 0 ... Index: llvm/trunk/test/MC/AMDGPU/flat-gfx10.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/flat-gfx10.s +++ llvm/trunk/test/MC/AMDGPU/flat-gfx10.s @@ -0,0 +1,119 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s + +flat_load_dword v1, v[3:4] +// GFX10: encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_dword v1, v[3:4] offset:-1 +// GFX10-ERR: error: invalid operand for instruction + +flat_load_dword v1, v[3:4] offset:2047 +// GFX10: encoding: [0xff,0x07,0x30,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_dword v1, v[3:4] offset:2048 +// GFX10-ERR: error: invalid operand for instruction + +flat_load_dword v1, v[3:4] offset:4 glc +// GFX10: encoding: [0x04,0x00,0x31,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_dword v1, v[3:4] offset:4 glc slc +// GFX10: encoding: [0x04,0x00,0x33,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_dword v1, v[3:4] offset:4 glc slc dlc +// GFX10: encoding: [0x04,0x10,0x33,0xdc,0x03,0x00,0x7d,0x01] + +flat_atomic_add v[3:4], v5 offset:8 slc +// GFX10: encoding: [0x08,0x00,0xca,0xdc,0x03,0x05,0x7d,0x00] + +flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 +// GFX10: encoding: [0xff,0x07,0xc4,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 slc +// GFX10: encoding: [0xff,0x07,0xc6,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v[1:2], v[3:4] +// GFX10: encoding: [0x00,0x00,0xc4,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v[1:2], v[3:4] slc +// GFX10: encoding: [0x00,0x00,0xc6,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v[1:2], v[3:4] offset:2047 glc +// GFX10-ERR: error: invalid operand for instruction + +flat_atomic_cmpswap v[1:2], v[3:4] glc +// GFX10-ERR: error: invalid operand for instruction + +flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc +// GFX10: encoding: [0xff,0x07,0xc5,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 glc slc +// GFX10: encoding: [0xff,0x07,0xc7,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[3:4] glc +// GFX10: encoding: [0x00,0x00,0xc5,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[3:4] glc slc +// GFX10: encoding: [0x00,0x00,0xc7,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[3:4] glc +// GFX10: encoding: [0x00,0x00,0xc5,0xdc,0x01,0x03,0x7d,0x00] + +flat_atomic_cmpswap v0, v[1:2], v[3:4] offset:2047 +// GFX10-ERR: error: too few operands for instruction + +flat_atomic_cmpswap v0, v[1:2], v[3:4] slc +// GFX10-ERR: error: invalid operand for instruction + +flat_atomic_swap v[3:4], v5 offset:16 +// GFX10: encoding: [0x10,0x00,0xc0,0xdc,0x03,0x05,0x7d,0x00] + +flat_store_dword v[3:4], v1 offset:16 +// GFX10: encoding: [0x10,0x00,0x70,0xdc,0x03,0x01,0x7d,0x00] + +flat_store_dword v[3:4], v1, off +// GFX10-ERR: error: invalid operand for instruction + +flat_store_dword v[3:4], v1, s[0:1] +// GFX10-ERR: error: invalid operand for instruction + +flat_store_dword v[3:4], v1, s0 +// GFX10-ERR: error: invalid operand for instruction + +flat_load_dword v1, v[3:4], off +// GFX10-ERR: error: invalid operand for instruction + +flat_load_dword v1, v[3:4], s[0:1] +// GFX10-ERR: error: invalid operand for instruction + +flat_load_dword v1, v[3:4], s0 +// GFX10-ERR: error: invalid operand for instruction + +flat_load_dword v1, v[3:4], exec_hi +// GFX10-ERR: error: invalid operand for instruction + +flat_store_dword v[3:4], v1, exec_hi +// GFX10-ERR: error: invalid operand for instruction + +flat_load_ubyte_d16 v1, v[3:4] +// GFX10: encoding: [0x00,0x00,0x80,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_ubyte_d16_hi v1, v[3:4] +// GFX10: encoding: [0x00,0x00,0x84,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_sbyte_d16 v1, v[3:4] +// GFX10: encoding: [0x00,0x00,0x88,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_sbyte_d16_hi v1, v[3:4] +// GFX10: encoding: [0x00,0x00,0x8c,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_short_d16 v1, v[3:4] +// GFX10: encoding: [0x00,0x00,0x90,0xdc,0x03,0x00,0x7d,0x01] + +flat_load_short_d16_hi v1, v[3:4] +// GFX10: encoding: [0x00,0x00,0x94,0xdc,0x03,0x00,0x7d,0x01] + +flat_store_byte_d16_hi v[3:4], v1 +// GFX10: encoding: [0x00,0x00,0x64,0xdc,0x03,0x01,0x7d,0x00] + +flat_store_short_d16_hi v[3:4], v1 +// GFX10: encoding: [0x00,0x00,0x6c,0xdc,0x03,0x01,0x7d,0x00] Index: llvm/trunk/test/MC/AMDGPU/flat-global.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/flat-global.s +++ llvm/trunk/test/MC/AMDGPU/flat-global.s @@ -2,364 +2,527 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR %s + global_load_ubyte v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x20,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte v1, v[3:4], off ; encoding: [0x00,0x80,0x40,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_ubyte v1, v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x20,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_load_sbyte v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x24,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte v1, v[3:4], off ; encoding: [0x00,0x80,0x44,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_sbyte v1, v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x24,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_load_ushort v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x28,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ushort v1, v[3:4], off ; encoding: [0x00,0x80,0x48,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_ushort v1, v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x28,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_load_sshort v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x2c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sshort v1, v[3:4], off ; encoding: [0x00,0x80,0x4c,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_sshort v1, v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x2c,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_load_dword v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_dword v1, v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x30,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_load_dwordx2 v[1:2], v[3:4], off +// GFX10: encoding: [0x00,0x80,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx2 v[1:2], v[3:4], off ; encoding: [0x00,0x80,0x54,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_dwordx2 v[1:2], v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x34,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_load_dwordx3 v[1:3], v[3:4], off +// GFX10: encoding: [0x00,0x80,0x3c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx3 v[1:3], v[3:4], off ; encoding: [0x00,0x80,0x58,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_dwordx3 v[1:3], v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x3c,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_load_dwordx4 v[1:4], v[3:4], off +// GFX10: encoding: [0x00,0x80,0x38,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dwordx4 v[1:4], v[3:4], off ; encoding: [0x00,0x80,0x5c,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +global_load_dwordx4 v[1:4], v[3:4], off dlc +// GFX10: encoding: [0x00,0x90,0x38,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + // FIXME: VI error should be instruction nto supported global_load_dword v1, v[3:4], off offset:0 +// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: :41: error: not a valid operand. global_load_dword v1, v[3:4], off offset:4095 +// GFX10-ERR: error: invalid operand for instruction // GFX9: global_load_dword v1, v[3:4], off offset:4095 ; encoding: [0xff,0x8f,0x50,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: :41: error: not a valid operand. global_load_dword v1, v[3:4], off offset:-1 +// GFX10: encoding: [0xff,0x8f,0x30,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_dword v1, v[3:4], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: :41: error: not a valid operand. global_load_dword v1, v[3:4], off offset:-4096 +// GFX10-ERR: error: invalid operand for instruction // GFX9: global_load_dword v1, v[3:4], off offset:-4096 ; encoding: [0x00,0x90,0x50,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: :41: error: not a valid operand. global_load_dword v1, v[3:4], off offset:4096 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: :35: error: invalid operand for instruction // VI-ERR: :41: error: not a valid operand. global_load_dword v1, v[3:4] off, offset:-4097 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: :35: error: invalid operand for instruction // VI-ERR: :41: error: not a valid operand. global_store_byte v[3:4], v1, off +// GFX10: encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_byte v[3:4], v1, off ; encoding: [0x00,0x80,0x60,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +global_store_byte v[3:4], v1, off dlc +// GFX10: encoding: [0x00,0x90,0x60,0xdc,0x03,0x01,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_store_short v[3:4], v1, off +// GFX10: encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_short v[3:4], v1, off ; encoding: [0x00,0x80,0x68,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +global_store_short v[3:4], v1, off dlc +// GFX10: encoding: [0x00,0x90,0x68,0xdc,0x03,0x01,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_store_dword v[3:4], v1, off +// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dword v[3:4], v1, off ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +global_store_dword v[3:4], v1, off dlc +// GFX10: encoding: [0x00,0x90,0x70,0xdc,0x03,0x01,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_store_dwordx2 v[3:4], v[1:2], off +// GFX10: encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx2 v[3:4], v[1:2], off ; encoding: [0x00,0x80,0x74,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +global_store_dwordx2 v[3:4], v[1:2], off dlc +// GFX10: encoding: [0x00,0x90,0x74,0xdc,0x03,0x01,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_store_dwordx3 v[3:4], v[1:3], off +// GFX10: encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx3 v[3:4], v[1:3], off ; encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +global_store_dwordx3 v[3:4], v[1:3], off dlc +// GFX10: encoding: [0x00,0x90,0x7c,0xdc,0x03,0x01,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_store_dwordx4 v[3:4], v[1:4], off +// GFX10: encoding: [0x00,0x80,0x78,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dwordx4 v[3:4], v[1:4], off ; encoding: [0x00,0x80,0x7c,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +global_store_dwordx4 v[3:4], v[1:4], off dlc +// GFX10: encoding: [0x00,0x90,0x78,0xdc,0x03,0x01,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + global_store_dword v[3:4], v1, off offset:12 +// GFX10: encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_dword v[3:4], v1, off offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: :42: error: not a valid operand global_load_dword v1, v[3:4], s[2:3] +// GFX10: encoding: [0x00,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v[3:4], s[2:3] ; encoding: [0x00,0x80,0x50,0xdc,0x03,0x00,0x02,0x01] // VI-ERR: instruction not supported on this GPU global_load_dword v1, v[3:4], s[2:3] offset:24 +// GFX10: encoding: [0x18,0x80,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v[3:4], s[2:3] offset:24 ; encoding: [0x18,0x80,0x50,0xdc,0x03,0x00,0x02,0x01] // VI-ERR: :44: error: not a valid operand. global_load_dword v1, v[3:4], s[2:3] offset:-8 +// GFX10: encoding: [0xf8,0x8f,0x30,0xdc,0x03,0x00,0x02,0x01] // GFX9: global_load_dword v1, v[3:4], s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x50,0xdc,0x03,0x00,0x02,0x01] // VI-ERR: :44: error: not a valid operand. global_store_dword v[3:4], v1, s[2:3] +// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v[3:4], v1, s[2:3] ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // VI-ERR: instruction not supported on this GPU global_store_dword v[3:4], v1, s[2:3] offset:24 +// GFX10: encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v[3:4], v1, s[2:3] offset:24 ; encoding: [0x18,0x80,0x70,0xdc,0x03,0x01,0x02,0x00] // VI-ERR: :45: error: not a valid operand. global_store_dword v[3:4], v1, s[2:3] offset:-8 +// GFX10: encoding: [0xf8,0x8f,0x70,0xdc,0x03,0x01,0x02,0x00] // GFX9: global_store_dword v[3:4], v1, s[2:3] offset:-8 ; encoding: [0xf8,0x9f,0x70,0xdc,0x03,0x01,0x02,0x00] // VI-ERR: :45: error: not a valid operand. // XXX: Is this valid? global_store_dword v[3:4], v1, exec +// GFX10: encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00] // GFX9: global_store_dword v[3:4], v1, exec ; encoding: [0x00,0x80,0x70,0xdc,0x03,0x01,0x7e,0x00] // VI-ERR: instruction not supported on this GPU global_load_dword v1, v[3:4], s2 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: :31: error: invalid operand for instruction // VI-ERR: :31: error: invalid operand for instruction global_load_dword v1, v[3:4], exec_hi +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: :31: error: invalid operand for instruction // VI-ERR: :31: error: invalid operand for instruction global_atomic_cmpswap v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0xc4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_cmpswap v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x04,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: error: instruction not supported on this GPU global_atomic_cmpswap_x2 v[3:4], v[5:8], off +// GFX10: encoding: [0x00,0x80,0x44,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8], off ; encoding: [0x00,0x80,0x84,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: error: instruction not supported on this GPU global_atomic_swap v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xc0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_swap v[3:4], v5, off ; encoding: [0x00,0x80,0x00,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: error: instruction not supported on this GPU global_atomic_swap_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x40,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_swap_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x80,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: error: instruction not supported on this GPU global_atomic_add v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xc8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add v[3:4], v5, off ; encoding: [0x00,0x80,0x08,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_sub v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xcc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub v[3:4], v5, off ; encoding: [0x00,0x80,0x0c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_smin v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xd4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin v[3:4], v5, off ; encoding: [0x00,0x80,0x10,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_umin v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xd8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin v[3:4], v5, off ; encoding: [0x00,0x80,0x14,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_smax v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xdc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax v[3:4], v5, off ; encoding: [0x00,0x80,0x18,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_umax v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xe0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax v[3:4], v5, off ; encoding: [0x00,0x80,0x1c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_and v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xe4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and v[3:4], v5, off ; encoding: [0x00,0x80,0x20,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_or v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xe8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or v[3:4], v5, off ; encoding: [0x00,0x80,0x24,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_xor v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xec,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor v[3:4], v5, off ; encoding: [0x00,0x80,0x28,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_inc v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xf0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc v[3:4], v5, off ; encoding: [0x00,0x80,0x2c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_dec v[3:4], v5, off +// GFX10: encoding: [0x00,0x80,0xf4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec v[3:4], v5, off ; encoding: [0x00,0x80,0x30,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_add_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x48,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x88,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_sub_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x4c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x8c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_smin_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x54,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x90,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_umin_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x58,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x94,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_smax_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x5c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x98,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_umax_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x60,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0x9c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_and_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x64,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa0,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_or_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x68,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa4,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_xor_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x6c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xa8,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_inc_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x70,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xac,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_atomic_dec_x2 v[3:4], v[5:6], off +// GFX10: encoding: [0x00,0x80,0x74,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec_x2 v[3:4], v[5:6], off ; encoding: [0x00,0x80,0xb0,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: error: instruction not supported on this GPU global_atomic_cmpswap v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xc4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_cmpswap v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x04,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :49: error: not a valid operand. global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x44,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8], off offset:-16 ; encoding: [0xf0,0x9f,0x84,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :52: error: not a valid operand. global_atomic_swap v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xc0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_swap v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x00,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :42: error: not a valid operand global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x40,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_swap_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x80,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :49: error: not a valid operand global_atomic_add v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xc8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x08,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :41: error: not a valid operand global_atomic_sub v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xcc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x0c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :41: error: not a valid operand global_atomic_smin v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xd4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x10,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :42: error: not a valid operand global_atomic_umin v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xd8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x14,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :42: error: not a valid operand global_atomic_smax v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xdc,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x18,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :42: error: not a valid operand global_atomic_umax v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xe0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x1c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :42: error: not a valid operand global_atomic_and v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xe4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x20,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :41: error: not a valid operand global_atomic_or v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xe8,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x24,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :40: error: not a valid operand global_atomic_xor v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xec,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x28,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :41: error: not a valid operand global_atomic_inc v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xf0,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x2c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :41: error: not a valid operand global_atomic_dec v[3:4], v5, off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0xf4,0xdc,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec v[3:4], v5, off offset:-16 ; encoding: [0xf0,0x9f,0x30,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :41: error: not a valid operand global_atomic_add_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x48,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_add_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x88,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :48: error: not a valid operand global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x4c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_sub_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x8c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :48: error: not a valid operand global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x54,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x90,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :49: error: not a valid operand global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x58,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umin_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x94,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :49: error: not a valid operand global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x5c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_smax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x98,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :49: error: not a valid operand global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x60,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_umax_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0x9c,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :49: error: not a valid operand global_atomic_and_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x64,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_and_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa0,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :48: error: not a valid operand global_atomic_or_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x68,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_or_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa4,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :47: error: not a valid operand global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x6c,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_xor_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xa8,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :48: error: not a valid operand global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x70,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_inc_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xac,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :48: error: not a valid operand global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 +// GFX10: encoding: [0xf0,0x8f,0x74,0xdd,0x03,0x05,0x7d,0x00] // GFX9: global_atomic_dec_x2 v[3:4], v[5:6], off offset:-16 ; encoding: [0xf0,0x9f,0xb0,0xdd,0x03,0x05,0x7f,0x00] // VI-ERR: :48: error: not a valid operand global_load_ubyte_d16 v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x80,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU global_load_ubyte_d16_hi v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_ubyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x84,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU global_load_sbyte_d16 v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x88,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU global_load_sbyte_d16_hi v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_sbyte_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x8c,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU global_load_short_d16 v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_short_d16 v1, v[3:4], off ; encoding: [0x00,0x80,0x90,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU global_load_short_d16_hi v1, v[3:4], off +// GFX10: encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7d,0x01] // GFX9: global_load_short_d16_hi v1, v[3:4], off ; encoding: [0x00,0x80,0x94,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU global_store_byte_d16_hi v[3:4], v1, off +// GFX10: encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_byte_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x64,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU global_store_short_d16_hi v[3:4], v1, off +// GFX10: encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7d,0x00] // GFX9: global_store_short_d16_hi v[3:4], v1, off ; encoding: [0x00,0x80,0x6c,0xdc,0x03,0x01,0x7f,0x00] // VI-ERR: instruction not supported on this GPU Index: llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s +++ llvm/trunk/test/MC/AMDGPU/flat-scratch-instructions.s @@ -2,176 +2,291 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding 2>&1 %s | FileCheck -check-prefix=GFX9-ERR -check-prefix=GCNERR %s // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding 2>&1 %s | FileCheck -check-prefix=VI-ERR -check-prefix=GCNERR %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR %s + scratch_load_ubyte v1, v2, off +// GFX10: encoding: [0x00,0x40,0x20,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte v1, v2, off ; encoding: [0x00,0x40,0x40,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +scratch_load_ubyte v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x20,0xdc,0x02,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_load_sbyte v1, v2, off +// GFX10: encoding: [0x00,0x40,0x24,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte v1, v2, off ; encoding: [0x00,0x40,0x44,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +scratch_load_sbyte v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x24,0xdc,0x02,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_load_ushort v1, v2, off +// GFX10: encoding: [0x00,0x40,0x28,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ushort v1, v2, off ; encoding: [0x00,0x40,0x48,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +scratch_load_ushort v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x28,0xdc,0x02,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_load_sshort v1, v2, off +// GFX10: encoding: [0x00,0x40,0x2c,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sshort v1, v2, off ; encoding: [0x00,0x40,0x4c,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +scratch_load_sshort v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x2c,0xdc,0x02,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_load_dword v1, v2, off +// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +scratch_load_dword v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x30,0xdc,0x02,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_load_dwordx2 v[1:2], v3, off +// GFX10: encoding: [0x00,0x40,0x34,0xdc,0x03,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx2 v[1:2], v3, off ; encoding: [0x00,0x40,0x54,0xdc,0x03,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +scratch_load_dwordx2 v[1:2], v3, off dlc +// GFX10: encoding: [0x00,0x50,0x34,0xdc,0x03,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_load_dwordx3 v[1:3], v4, off +// GFX10: encoding: [0x00,0x40,0x3c,0xdc,0x04,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx3 v[1:3], v4, off ; encoding: [0x00,0x40,0x58,0xdc,0x04,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU +scratch_load_dwordx3 v[1:3], v4, off dlc +// GFX10: encoding: [0x00,0x50,0x3c,0xdc,0x04,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_load_dwordx4 v[1:4], v5, off +// GFX10: encoding: [0x00,0x40,0x38,0xdc,0x05,0x00,0x7d,0x01] // GFX9: scratch_load_dwordx4 v[1:4], v5, off ; encoding: [0x00,0x40,0x5c,0xdc,0x05,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU -// FIXME: VI error should be instruction nto supported + +scratch_load_dwordx4 v[1:4], v5, off dlc +// GFX10: encoding: [0x00,0x50,0x38,0xdc,0x05,0x00,0x7d,0x01] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction scratch_load_dword v1, v2, off offset:0 +// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_dword v1, v2, off ; encoding: [0x00,0x40,0x50,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: error: not a valid operand. scratch_load_dword v1, v2, off offset:4095 +// GFX10-ERR: error: invalid operand for instruction // GFX9: scratch_load_dword v1, v2, off offset:4095 ; encoding: [0xff,0x4f,0x50,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: error: not a valid operand. scratch_load_dword v1, v2, off offset:-1 +// GFX10: encoding: [0xff,0x4f,0x30,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_dword v1, v2, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: error: not a valid operand. scratch_load_dword v1, v2, off offset:-4096 +// GFX10-ERR: error: invalid operand for instruction // GFX9: scratch_load_dword v1, v2, off offset:-4096 ; encoding: [0x00,0x50,0x50,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: error: not a valid operand. scratch_load_dword v1, v2, off offset:4096 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: not a valid operand. scratch_load_dword v1, v2, off offset:-4097 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: not a valid operand. scratch_store_byte v1, v2, off +// GFX10: encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_byte v1, v2, off ; encoding: [0x00,0x40,0x60,0xdc,0x01,0x02,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +scratch_store_byte v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x60,0xdc,0x01,0x02,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_store_short v1, v2, off +// GFX10: encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_short v1, v2, off ; encoding: [0x00,0x40,0x68,0xdc,0x01,0x02,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +scratch_store_short v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x68,0xdc,0x01,0x02,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_store_dword v1, v2, off +// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dword v1, v2, off ; encoding: [0x00,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +scratch_store_dword v1, v2, off dlc +// GFX10: encoding: [0x00,0x50,0x70,0xdc,0x01,0x02,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_store_dwordx2 v1, v[2:3], off +// GFX10: encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx2 v1, v[2:3], off ; encoding: [0x00,0x40,0x74,0xdc,0x01,0x02,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +scratch_store_dwordx2 v1, v[2:3], off dlc +// GFX10: encoding: [0x00,0x50,0x74,0xdc,0x01,0x02,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_store_dwordx3 v1, v[2:4], off +// GFX10: encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx3 v1, v[2:4], off ; encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +scratch_store_dwordx3 v1, v[2:4], off dlc +// GFX10: encoding: [0x00,0x50,0x7c,0xdc,0x01,0x02,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_store_dwordx4 v1, v[2:5], off +// GFX10: encoding: [0x00,0x40,0x78,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dwordx4 v1, v[2:5], off ; encoding: [0x00,0x40,0x7c,0xdc,0x01,0x02,0x7f,0x00] // VI-ERR: instruction not supported on this GPU +scratch_store_dwordx4 v1, v[2:5], off dlc +// GFX10: encoding: [0x00,0x50,0x78,0xdc,0x01,0x02,0x7d,0x00] +// GFX9-ERR: error: failed parsing operand +// VI-ERR: error: invalid operand for instruction + scratch_store_dword v1, v2, off offset:12 +// GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7d,0x00] // GFX9: scratch_store_dword v1, v2, off offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x01,0x02,0x7f,0x00] // VI-ERR: error: not a valid operand scratch_load_dword v1, off, s1 +// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] // GFX9: scratch_load_dword v1, off, s1 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x01,0x01] // VI-ERR: instruction not supported on this GPU scratch_load_dword v1, off, s1 offset:32 +// GFX10: encoding: [0x20,0x40,0x30,0xdc,0x00,0x00,0x01,0x01] // GFX9: scratch_load_dword v1, off, s1 offset:32 ; encoding: [0x20,0x40,0x50,0xdc,0x00,0x00,0x01,0x01] // VI-ERR: error: not a valid operand scratch_store_dword off, v2, s1 +// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_dword off, v2, s1 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] // VI-ERR: instruction not supported on this GPU scratch_store_dword off, v2, s1 offset:12 +// GFX10: encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_dword off, v2, s1 offset:12 ; encoding: [0x0c,0x40,0x70,0xdc,0x00,0x02,0x01,0x00] // VI-ERR: error: not a valid operand // FIXME: Should error about multiple offsets scratch_load_dword v1, v2, s1 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: invalid operand for instruction scratch_load_dword v1, v2, s1 offset:32 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: not a valid operand scratch_store_dword v1, v2, s1 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: invalid operand for instruction scratch_store_dword v1, v2, s1 offset:32 +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: not a valid operand scratch_load_dword v1, off, exec_hi +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: invalid operand for instruction scratch_store_dword off, v2, exec_hi +// GFX10-ERR: error: invalid operand for instruction // GFX9-ERR: error: invalid operand for instruction // VI-ERR: error: invalid operand for instruction scratch_load_dword v1, off, exec_lo +// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7e,0x01] // GFX9: scratch_load_dword v1, off, exec_lo ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7e,0x01] // VI-ERR: instruction not supported on this GPU scratch_store_dword off, v2, exec_lo +// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00] // GFX9: scratch_store_dword off, v2, exec_lo ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7e,0x00] // VI-ERR: instruction not supported on this GPU scratch_load_dword v1, off, m0 +// GFX10: encoding: [0x00,0x40,0x30,0xdc,0x00,0x00,0x7c,0x01] // GFX9: scratch_load_dword v1, off, m0 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x7c,0x01] // VI-ERR: instruction not supported on this GPU scratch_store_dword off, v2, m0 +// GFX10: encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00] // GFX9: scratch_store_dword off, v2, m0 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x7c,0x00] // VI-ERR: instruction not supported on this GPU scratch_load_ubyte_d16 v1, v2, off +// GFX10: encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x80,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU scratch_load_ubyte_d16_hi v1, v2, off +// GFX10: encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_ubyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x84,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU scratch_load_sbyte_d16 v1, v2, off +// GFX10: encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte_d16 v1, v2, off ; encoding: [0x00,0x40,0x88,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU scratch_load_sbyte_d16_hi v1, v2, off +// GFX10: encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_sbyte_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x8c,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU scratch_load_short_d16 v1, v2, off +// GFX10: encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_short_d16 v1, v2, off ; encoding: [0x00,0x40,0x90,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU scratch_load_short_d16_hi v1, v2, off +// GFX10: encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7d,0x01] // GFX9: scratch_load_short_d16_hi v1, v2, off ; encoding: [0x00,0x40,0x94,0xdc,0x02,0x00,0x7f,0x01] // VI-ERR: instruction not supported on this GPU scratch_store_byte_d16_hi off, v2, s1 +// GFX10: encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_byte_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x01,0x00] // VI-ERR: instruction not supported on this GPU scratch_store_short_d16_hi off, v2, s1 +// GFX10: encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00] // GFX9: scratch_store_short_d16_hi off, v2, s1 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x01,0x00] // VI-ERR: instruction not supported on this GPU