Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -37,6 +37,12 @@ using namespace llvm; +static cl::opt EnableVGPRIndexMode( + "amdgpu-vgpr-index-mode", + cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), + cl::init(false)); + + static unsigned findFirstFreeSGPR(CCState &CCInfo) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -1129,7 +1135,8 @@ unsigned ResultReg, unsigned PhiReg, unsigned InitSaveExecReg, - int Offset) { + int Offset, + bool UseGPRIdxMode) { MachineBasicBlock::iterator I = LoopBB.begin(); unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); @@ -1158,14 +1165,31 @@ .addReg(CurrentIdxReg) .addReg(IdxReg.getReg(), 0, IdxReg.getSubReg()); - // Move index from VCC into M0 - if (Offset == 0) { - BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) - .addReg(CurrentIdxReg, RegState::Kill); + if (UseGPRIdxMode) { + unsigned IdxReg; + if (Offset == 0) { + IdxReg = CurrentIdxReg; + } else { + IdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), IdxReg) + .addReg(CurrentIdxReg, RegState::Kill) + .addImm(Offset); + } + + MachineInstr *SetIdx = + BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_IDX)) + .addReg(IdxReg, RegState::Kill); + SetIdx->getOperand(2).setIsUndef(true); } else { - BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) - .addReg(CurrentIdxReg, RegState::Kill) - .addImm(Offset); + // Move index from VCC into M0 + if (Offset == 0) { + BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(CurrentIdxReg, RegState::Kill); + } else { + BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) + .addReg(CurrentIdxReg, RegState::Kill) + .addImm(Offset); + } } // Update EXEC, save the original EXEC value to VCC. @@ -1200,7 +1224,8 @@ MachineInstr &MI, unsigned InitResultReg, unsigned PhiReg, - int Offset) { + int Offset, + bool UseGPRIdxMode) { MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const DebugLoc &DL = MI.getDebugLoc(); @@ -1239,7 +1264,7 @@ auto InsPt = emitLoadM0FromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, *Idx, InitResultReg, DstReg, PhiReg, TmpExec, - Offset); + Offset, UseGPRIdxMode); MachineBasicBlock::iterator First = RemainderBB->begin(); BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) @@ -1270,7 +1295,9 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII, MachineRegisterInfo &MRI, MachineInstr &MI, - int Offset) { + int Offset, + bool UseGPRIdxMode, + bool IsIndirectSrc) { MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); @@ -1283,6 +1310,32 @@ if (!TII->getRegisterInfo().isSGPRClass(IdxRC)) return false; + if (UseGPRIdxMode) { + unsigned IdxMode = IsIndirectSrc ? + VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE; + if (Offset == 0) { + MachineInstr *SetOn = + BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON)) + .addOperand(*Idx) + .addImm(IdxMode); + + SetOn->getOperand(3).setIsUndef(AMDGPU::M0); + } else { + unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp) + .addOperand(*Idx) + .addImm(Offset); + MachineInstr *SetOn = + BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON)) + .addReg(Tmp, RegState::Kill) + .addImm(IdxMode); + + SetOn->getOperand(3).setIsUndef(AMDGPU::M0); + } + + return true; + } + if (Offset == 0) { BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addOperand(*Idx); @@ -1314,18 +1367,33 @@ std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC, SrcVec->getReg(), Offset); - if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset)) { + bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode; + + if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) { MachineBasicBlock::iterator I(&MI); const DebugLoc &DL = MI.getDebugLoc(); - BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) - .addReg(SrcVec->getReg(), RegState::Undef, SubReg) - .addReg(SrcVec->getReg(), RegState::Implicit); + if (UseGPRIdxMode) { + // TODO: Look at the uses to avoid the copy. This may require rescheduling + // to avoid interfering with other uses, so probably requires a new + // optimization pass. + BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst) + .addReg(SrcVec->getReg(), RegState::Undef, SubReg) + .addReg(SrcVec->getReg(), RegState::Implicit) + .addReg(AMDGPU::M0, RegState::Implicit); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + } else { + BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + .addReg(SrcVec->getReg(), RegState::Undef, SubReg) + .addReg(SrcVec->getReg(), RegState::Implicit); + } + MI.eraseFromParent(); return &MBB; } + const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator I(&MI); @@ -1334,15 +1402,32 @@ BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg); + if (UseGPRIdxMode) { + MachineInstr *SetOn = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON)) + .addImm(0) // Reset inside loop. + .addImm(VGPRIndexMode::SRC0_ENABLE); + SetOn->getOperand(3).setIsUndef(AMDGPU::M0); - auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, Offset); - BuildMI(*InsPt->getParent(), InsPt, DL, - TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) - .addReg(SrcVec->getReg(), RegState::Undef, SubReg) - .addReg(SrcVec->getReg(), RegState::Implicit); + // Disable again after the loop. + BuildMI(MBB, std::next(I), DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + } + + auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, Offset, UseGPRIdxMode); + MachineBasicBlock *LoopBB = InsPt->getParent(); - return InsPt->getParent(); + if (UseGPRIdxMode) { + BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst) + .addReg(SrcVec->getReg(), RegState::Undef, SubReg) + .addReg(SrcVec->getReg(), RegState::Implicit) + .addReg(AMDGPU::M0, RegState::Implicit); + } else { + BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + .addReg(SrcVec->getReg(), RegState::Undef, SubReg) + .addReg(SrcVec->getReg(), RegState::Implicit); + } + + return LoopBB; } static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, @@ -1367,6 +1452,8 @@ std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC, SrcVec->getReg(), Offset); + bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode; + if (Idx->getReg() == AMDGPU::NoRegister) { MachineBasicBlock::iterator I(&MI); const DebugLoc &DL = MI.getDebugLoc(); @@ -1382,23 +1469,36 @@ return &MBB; } - const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32); - if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset)) { + if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, false)) { MachineBasicBlock::iterator I(&MI); const DebugLoc &DL = MI.getDebugLoc(); - MachineInstr *MovRel = - BuildMI(MBB, I, DL, MovRelDesc) - .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst - .addOperand(*Val) - .addReg(Dst, RegState::ImplicitDefine) - .addReg(SrcVec->getReg(), RegState::Implicit); + if (UseGPRIdxMode) { + BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect)) + .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst + .addOperand(*Val) + .addReg(Dst, RegState::ImplicitDefine) + .addReg(SrcVec->getReg(), RegState::Implicit) + .addReg(AMDGPU::M0, RegState::Implicit); - const int ImpDefIdx = MovRelDesc.getNumOperands() + - MovRelDesc.getNumImplicitUses(); - const int ImpUseIdx = ImpDefIdx + 1; + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + } else { + const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32); + + MachineInstr *MovRel = + BuildMI(MBB, I, DL, MovRelDesc) + .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst + .addOperand(*Val) + .addReg(Dst, RegState::ImplicitDefine) + .addReg(SrcVec->getReg(), RegState::Implicit); + + const int ImpDefIdx = MovRelDesc.getNumOperands() + + MovRelDesc.getNumImplicitUses(); + const int ImpUseIdx = ImpDefIdx + 1; + + MovRel->tieOperands(ImpDefIdx, ImpUseIdx); + } - MovRel->tieOperands(ImpDefIdx, ImpUseIdx); MI.eraseFromParent(); return &MBB; } @@ -1407,25 +1507,50 @@ MRI.clearKillFlags(Val->getReg()); const DebugLoc &DL = MI.getDebugLoc(); + + if (UseGPRIdxMode) { + MachineBasicBlock::iterator I(&MI); + + MachineInstr *SetOn = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON)) + .addImm(0) // Reset inside loop. + .addImm(VGPRIndexMode::DST_ENABLE); + SetOn->getOperand(3).setIsUndef(AMDGPU::M0); + + // Disable again after the loop. + BuildMI(MBB, std::next(I), DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); + } + unsigned PhiReg = MRI.createVirtualRegister(VecRC); - auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset); + auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, + Offset, UseGPRIdxMode); + MachineBasicBlock *LoopBB = InsPt->getParent(); - // vdst is not actually read and just provides the base register index. - MachineInstr *MovRel = - BuildMI(*InsPt->getParent(), InsPt, DL, MovRelDesc) + if (UseGPRIdxMode) { + BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect)) + .addReg(PhiReg, RegState::Undef, SubReg) // vdst + .addOperand(*Val) // src0 + .addReg(Dst, RegState::ImplicitDefine) + .addReg(PhiReg, RegState::Implicit) + .addReg(AMDGPU::M0, RegState::Implicit); + } else { + const MCInstrDesc &MovRelDesc = TII->get(AMDGPU::V_MOVRELD_B32_e32); + // vdst is not actually read and just provides the base register index. + MachineInstr *MovRel = + BuildMI(*LoopBB, InsPt, DL, MovRelDesc) .addReg(PhiReg, RegState::Undef, SubReg) // vdst .addOperand(*Val) .addReg(Dst, RegState::ImplicitDefine) .addReg(PhiReg, RegState::Implicit); - const int ImpDefIdx = MovRelDesc.getNumOperands() + - MovRelDesc.getNumImplicitUses(); - const int ImpUseIdx = ImpDefIdx + 1; + const int ImpDefIdx = MovRelDesc.getNumOperands() + + MovRelDesc.getNumImplicitUses(); + const int ImpUseIdx = ImpDefIdx + 1; - MovRel->tieOperands(ImpDefIdx, ImpUseIdx); + MovRel->tieOperands(ImpDefIdx, ImpUseIdx); + } - return InsPt->getParent(); + return LoopBB; } MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1476,6 +1476,20 @@ .addImm(0); // omod } +// It's not generally safe to move VALU instructions across these since it will +// start using the register as a base index rather than directly. +// XXX - Why isn't hasSideEffects sufficient for these? +static bool changesVGPRIndexingMode(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AMDGPU::S_SET_GPR_IDX_ON: + case AMDGPU::S_SET_GPR_IDX_MODE: + case AMDGPU::S_SET_GPR_IDX_OFF: + return true; + default: + return false; + } +} + bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { @@ -1485,7 +1499,8 @@ // when they operate on VGPRs. Treating EXEC modifications as scheduling // boundaries prevents incorrect movements of such instructions. return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) || - MI.modifiesRegister(AMDGPU::EXEC, &RI); + MI.modifiesRegister(AMDGPU::EXEC, &RI) || + changesVGPRIndexingMode(MI); } bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { Index: lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP1Instructions.td +++ lib/Target/AMDGPU/VOP1Instructions.td @@ -527,6 +527,17 @@ defm V_SIN_F16 : VOP1_Real_vi <0x49>; defm V_COS_F16 : VOP1_Real_vi <0x4a>; + +// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR +// indexing mode. vdst can't be treated as a def for codegen purposes, +// and an implicit use and def of the super register should be added. +def V_MOV_B32_indirect : VPseudoInstSI<(outs), + (ins getVALUDstForVT.ret:$vdst, getVOPSrc0ForVT.ret:$src0)>, + PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT.ret:$vdst, + getVOPSrc0ForVT.ret:$src0)> { + let VOP1 = 1; +} + let Predicates = [isVI] in { def : Pat < Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s +; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s ; Tests for indirect addressing on SI, which is implemented using dynamic ; indexing of vectors. @@ -10,8 +11,13 @@ ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 ; GCN-DAG: v_mov_b32_e32 [[BASEREG:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 -; GCN-DAG: s_mov_b32 m0, [[IN]] -; GCN: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]] + +; MOVREL-DAG: s_mov_b32 m0, [[IN]] +; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]] + +; IDXMODE: s_set_gpr_idx_on [[IN]], src0{{$}} +; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, [[BASEREG]] +; IDXMODE-NEXT: s_set_gpr_idx_off define void @extract_w_offset(float addrspace(1)* %out, i32 %in) { entry: %idx = add i32 %in, 1 @@ -22,7 +28,7 @@ ; XXX: Could do v_or_b32 directly ; GCN-LABEL: {{^}}extract_w_offset_salu_use_vector: -; GCN: s_mov_b32 m0 +; MOVREL: s_mov_b32 m0 ; GCN-DAG: s_or_b32 ; GCN-DAG: s_or_b32 ; GCN-DAG: s_or_b32 @@ -31,7 +37,12 @@ ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} -; GCN: v_movrels_b32_e32 + +; MOVREL: v_movrels_b32_e32 + +; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0{{$}} +; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; IDXMODE-NEXT: s_set_gpr_idx_off define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) { entry: %idx = add i32 %in, 1 @@ -47,8 +58,13 @@ ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 ; GCN-DAG: v_mov_b32_e32 [[BASEREG:v[0-9]+]], 1.0 -; GCN-DAG: s_mov_b32 m0, [[IN]] -; GCN: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]] + +; MOVREL-DAG: s_mov_b32 m0, [[IN]] +; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]] + +; IDXMODE: s_set_gpr_idx_on [[IN]], src0{{$}} +; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, [[BASEREG]] +; IDXMODE-NEXT: s_set_gpr_idx_off define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) { entry: %elt = extractelement <4 x float> , i32 %in @@ -58,8 +74,13 @@ ; GCN-LABEL: {{^}}extract_neg_offset_sgpr: ; The offset depends on the register that holds the first element of the vector. -; GCN: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} -; GCN: v_movrels_b32_e32 v{{[0-9]}}, v0 +; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; MOVREL: v_movrels_b32_e32 v{{[0-9]}}, v0 + +; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE-NEXT: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}} +; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; IDXMODE-NEXT: s_set_gpr_idx_off define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) { entry: %index = add i32 %offset, -512 @@ -70,8 +91,13 @@ ; GCN-LABEL: {{^}}extract_neg_offset_sgpr_loaded: ; The offset depends on the register that holds the first element of the vector. -; GCN: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} -; GCN: v_movrels_b32_e32 v{{[0-9]}}, v0 +; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; MOVREL: v_movrels_b32_e32 v{{[0-9]}}, v0 + +; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE-NEXT: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}} +; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; IDXMODE-NEXT: s_set_gpr_idx_off define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) { entry: %index = add i32 %offset, -512 @@ -85,14 +111,24 @@ ; The offset depends on the register that holds the first element of the vector. ; FIXME: The waitcnt for the argument load can go after the loop +; IDXMODE: s_set_gpr_idx_on 0, src0 ; GCN: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, exec ; GCN: s_waitcnt lgkmcnt(0) ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v{{[0-9]+}} -; GCN: s_add_i32 m0, [[READLANE]], 0xfffffe0 -; GCN: v_movrels_b32_e32 [[RESULT:v[0-9]+]], v1 + +; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe0 +; MOVREL: s_and_saveexec_b64 vcc, vcc +; MOVREL: v_movrels_b32_e32 [[RESULT:v[0-9]+]], v1 + +; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00 +; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]] +; IDXMODE: s_and_saveexec_b64 vcc, vcc +; IDXMODE: v_mov_b32_e32 [[RESULT:v[0-9]+]], v1 + ; GCN: s_cbranch_execnz +; IDXMODE: s_set_gpr_idx_off ; GCN: buffer_store_dword [[RESULT]] define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) { entry: @@ -114,8 +150,8 @@ ; GCN-LABEL: {{^}}insert_undef_offset_sgpr_vector_src: ; GCN-DAG: buffer_load_dwordx4 -; GCN-DAG: s_mov_b32 m0, -; GCN: v_movreld_b32 +; MOVREL-DAG: s_mov_b32 m0, +; MOVREL: v_movreld_b32 define void @insert_undef_offset_sgpr_vector_src(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { entry: %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in @@ -126,14 +162,15 @@ ; GCN-LABEL: {{^}}insert_w_offset: ; GCN-DAG: s_load_dword [[IN:s[0-9]+]] -; GCN-DAG: s_mov_b32 m0, [[IN]] +; MOVREL-DAG: s_mov_b32 m0, [[IN]] ; GCN-DAG: v_mov_b32_e32 v[[ELT0:[0-9]+]], 1.0 ; GCN-DAG: v_mov_b32_e32 v[[ELT1:[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 v[[ELT2:[0-9]+]], 0x40400000 ; GCN-DAG: v_mov_b32_e32 v[[ELT3:[0-9]+]], 4.0 ; GCN-DAG: v_mov_b32_e32 v[[INS:[0-9]+]], 0x40a00000 -; GCN: v_movreld_b32_e32 v[[ELT1]], v[[INS]] -; GCN: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}} + +; MOVREL: v_movreld_b32_e32 v[[ELT1]], v[[INS]] +; MOVREL: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}} define void @insert_w_offset(<4 x float> addrspace(1)* %out, i32 %in) { entry: %0 = add i32 %in, 1 @@ -144,8 +181,14 @@ ; GCN-LABEL: {{^}}insert_wo_offset: ; GCN: s_load_dword [[IN:s[0-9]+]] -; GCN: s_mov_b32 m0, [[IN]] -; GCN: v_movreld_b32_e32 v[[ELT0:[0-9]+]] + +; MOVREL: s_mov_b32 m0, [[IN]] +; MOVREL: v_movreld_b32_e32 v[[ELT0:[0-9]+]] + +; IDXMODE: s_set_gpr_idx_on [[IN]], dst +; IDXMODE-NEXT: v_mov_b32_e32 v[[ELT0:[0-9]+]], v{{[0-9]+}} +; IDXMODE-NEXT: s_set_gpr_idx_off + ; GCN: buffer_store_dwordx4 v{{\[}}[[ELT0]]: define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) { entry: @@ -156,8 +199,13 @@ ; GCN-LABEL: {{^}}insert_neg_offset_sgpr: ; The offset depends on the register that holds the first element of the vector. -; GCN: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} -; GCN: v_movreld_b32_e32 v0, 5 +; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; MOVREL: v_movreld_b32_e32 v0, 5 + +; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], dst +; IDXMODE-NEXT: v_mov_b32_e32 v0, 5 +; IDXMODE-NEXT: s_set_gpr_idx_off define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) { entry: %index = add i32 %offset, -512 @@ -171,8 +219,13 @@ ; GCN-LABEL: {{^}}insert_neg_offset_sgpr_loadreg: ; The offset depends on the register that holds the first element of the vector. -; GCN: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} -; GCN: v_movreld_b32_e32 v0, 5 +; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; MOVREL: v_movreld_b32_e32 v0, 5 + +; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], dst +; IDXMODE-NEXT: v_mov_b32_e32 v0, 5 +; IDXMODE-NEXT: s_set_gpr_idx_off define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) { entry: %index = add i32 %offset, -512 @@ -194,11 +247,21 @@ ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] -; GCN: s_add_i32 m0, [[READLANE]], 0xfffffe00 -; GCN: v_movreld_b32_e32 [[VEC_ELT0]], 5 -; GCN: s_cbranch_execnz [[LOOPBB]] +; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe00 +; MOVREL: s_and_saveexec_b64 vcc, vcc +; MOVREL: v_movreld_b32_e32 [[VEC_ELT0]], 5 + +; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]] +; IDXMODE: s_and_saveexec_b64 vcc, vcc +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 5 + +; GCN: s_cbranch_execnz [[LOOPBB]] ; GCN: s_mov_b64 exec, [[SAVEEXEC]] + +; IDXMODE: s_set_gpr_idx_off + ; GCN: buffer_store_dword define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { entry: @@ -217,14 +280,24 @@ ; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}} ; GCN-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x1f4{{$}} +; IDXMODE: s_set_gpr_idx_on 0, dst + ; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec ; GCN: s_waitcnt lgkmcnt(0) ; The offset depends on the register that holds the first element of the vector. ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] -; GCN: s_add_i32 m0, [[READLANE]], -16 -; GCN: v_movreld_b32_e32 [[VEC_ELT0]], [[VAL]] + +; MOVREL: s_add_i32 m0, [[READLANE]], -16 +; MOVREL: v_movreld_b32_e32 [[VEC_ELT0]], [[VAL]] + +; IDXMODE: s_add_i32 [[ADD_IDX:s[0-9]+]], [[READLANE]], -16 +; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]] +; IDXMODE: v_mov_b32_e32 [[VEC_ELT0]], [[VAL]] + ; GCN: s_cbranch_execnz + +; IDXMODE: s_set_gpr_idx_off define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -247,32 +320,52 @@ ; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], [[S_ELT0]] ; GCN-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], [[S_ELT1]] +; IDXMODE: s_set_gpr_idx_on 0, src0 + ; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec ; GCN: s_waitcnt vmcnt(0) ; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] -; GCN: s_mov_b32 m0, [[READLANE]] -; GCN: s_and_saveexec_b64 vcc, vcc -; GCN: v_movrels_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]] + +; MOVREL: s_mov_b32 m0, [[READLANE]] +; MOVREL: s_and_saveexec_b64 vcc, vcc +; MOVREL: v_movrels_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]] + +; IDXMODE: s_set_gpr_idx_idx [[READLANE]] +; IDXMODE: s_and_saveexec_b64 vcc, vcc +; IDXMODE: v_mov_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]] + ; GCN-NEXT: s_xor_b64 exec, exec, vcc ; GCN-NEXT: s_cbranch_execnz [[LOOP0]] ; FIXME: Redundant copy ; GCN: s_mov_b64 exec, [[MASK]] +; IDXMODE: s_set_gpr_idx_off + ; GCN: v_mov_b32_e32 [[VEC_ELT1_2:v[0-9]+]], [[S_ELT1]] + +; IDXMODE: s_set_gpr_idx_on 0, src0 ; GCN: s_mov_b64 [[MASK2:s\[[0-9]+:[0-9]+\]]], exec ; GCN: [[LOOP1:BB[0-9]+_[0-9]+]]: ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] -; GCN: s_mov_b32 m0, [[READLANE]] -; GCN: s_and_saveexec_b64 vcc, vcc -; GCN-NEXT: v_movrels_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1_2]] + +; MOVREL: s_mov_b32 m0, [[READLANE]] +; MOVREL: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: v_movrels_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1_2]] + +; IDXMODE: s_set_gpr_idx_idx [[READLANE]] +; IDXMODE: s_and_saveexec_b64 vcc, vcc +; IDXMODE-NEXT: v_mov_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1_2]] + ; GCN-NEXT: s_xor_b64 exec, exec, vcc ; GCN: s_cbranch_execnz [[LOOP1]] +; IDXMODE: s_set_gpr_idx_off + ; GCN: buffer_store_dword [[MOVREL0]] ; GCN: buffer_store_dword [[MOVREL1]] define void @extract_vgpr_offset_multiple_in_block(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { @@ -308,25 +401,42 @@ ; GCN: v_mov_b32_e32 v[[VEC_ELT1:[0-9]+]], s{{[0-9]+}} ; GCN: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]] +; IDXMODE: s_set_gpr_idx_on 0, dst + ; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] -; GCN: s_mov_b32 m0, [[READLANE]] -; GCN: s_and_saveexec_b64 vcc, vcc -; GCN-NEXT: v_movreld_b32_e32 v[[VEC_ELT0]], [[INS0]] + +; MOVREL: s_mov_b32 m0, [[READLANE]] +; MOVREL: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: v_movreld_b32_e32 v[[VEC_ELT0]], [[INS0]] + +; IDXMODE: s_set_gpr_idx_idx [[READLANE]] +; IDXMODE: s_and_saveexec_b64 vcc, vcc +; IDXMODE-NEXT: v_mov_b32_e32 v[[VEC_ELT0]], [[INS0]] + ; GCN-NEXT: s_xor_b64 exec, exec, vcc ; GCN: s_cbranch_execnz [[LOOP0]] ; FIXME: Redundant copy ; GCN: s_mov_b64 exec, [[MASK:s\[[0-9]+:[0-9]+\]]] +; IDXMODE: s_set_gpr_idx_off + +; IDXMODE: s_set_gpr_idx_on 0, dst ; GCN: s_mov_b64 [[MASK]], exec ; GCN: [[LOOP1:BB[0-9]+_[0-9]+]]: ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] -; GCN: s_mov_b32 m0, [[READLANE]] -; GCN: s_and_saveexec_b64 vcc, vcc -; GCN-NEXT: v_movreld_b32_e32 v[[VEC_ELT1]], 63 + +; MOVREL: s_mov_b32 m0, [[READLANE]] +; MOVREL: s_and_saveexec_b64 vcc, vcc +; MOVREL-NEXT: v_movreld_b32_e32 v[[VEC_ELT1]], 63 + +; IDXMODE: s_set_gpr_idx_idx [[READLANE]] +; IDXMODE: s_and_saveexec_b64 vcc, vcc +; IDXMODE-NEXT: v_mov_b32_e32 v[[VEC_ELT1]], 63 + ; GCN-NEXT: s_xor_b64 exec, exec, vcc ; GCN: s_cbranch_execnz [[LOOP1]] @@ -361,14 +471,23 @@ ; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] ; GCN: buffer_load_dwordx4 -; GCN: s_mov_b32 m0, -; GCN: v_movrels_b32_e32 +; MOVREL: s_mov_b32 m0, +; MOVREL: v_movrels_b32_e32 + +; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0 +; IDXMODE: v_mov_b32_e32 +; IDXMODE: s_set_gpr_idx_off + ; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] ; GCN: [[BB4]]: ; GCN: buffer_load_dwordx4 -; GCN: s_mov_b32 m0, -; GCN: v_movrels_b32_e32 +; MOVREL: s_mov_b32 m0, +; MOVREL: v_movrels_b32_e32 + +; IDXMODE: s_set_gpr_idx_on +; IDXMODE: v_mov_b32_e32 +; IDXMODE: s_set_gpr_idx_off ; GCN: [[ENDBB]]: ; GCN: buffer_store_dword @@ -400,14 +519,23 @@ ; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] ; GCN: buffer_load_dwordx4 -; GCN: s_mov_b32 m0, -; GCN: v_movreld_b32_e32 +; MOVREL: s_mov_b32 m0, +; MOVREL: v_movreld_b32_e32 + +; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst +; IDXMODE: v_mov_b32_e32 +; IDXMODE: s_set_gpr_idx_off + ; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] ; GCN: [[BB4]]: ; GCN: buffer_load_dwordx4 -; GCN: s_mov_b32 m0, -; GCN: v_movreld_b32_e32 +; MOVREL: s_mov_b32 m0, +; MOVREL: v_movreld_b32_e32 + +; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst +; IDXMODE: v_mov_b32_e32 +; IDXMODE: s_set_gpr_idx_off ; GCN: [[ENDBB]]: ; GCN: buffer_store_dword @@ -445,17 +573,27 @@ ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 ; GCN-DAG: s_load_dword [[ARG:s[0-9]+]] -; GCN-DAG: s_add_i32 m0, [[ARG]], -16 -; GCN: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0 +; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16 +; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0 ; GCN-NOT: m0 +; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16 +; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst +; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0 +; IDXMODE: s_set_gpr_idx_off + ; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd -; GCN: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0 + +; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0 + +; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst +; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0 +; IDXMODE: s_set_gpr_idx_off ; GCN: s_mov_b32 m0, -1 ; GCN: ds_write_b32 @@ -480,8 +618,13 @@ ; GCN-LABEL: {{^}}extract_largest_inbounds_offset: ; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}} ; GCN-DAG: s_load_dword [[IDX:s[0-9]+]] -; GCN: s_mov_b32 m0, [[IDX]] -; GCN: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]] +; MOVREL: s_mov_b32 m0, [[IDX]] +; MOVREL: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]] + +; IDXMODE: s_set_gpr_idx_on [[IDX]], src0 +; IDXMODE: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]] +; IDXMODE: s_set_gpr_idx_off + ; GCN: buffer_store_dword [[EXTRACT]] define void @extract_largest_inbounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { entry: @@ -495,8 +638,14 @@ ; GCN-LABEL: {{^}}extract_out_of_bounds_offset: ; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}} ; GCN-DAG: s_load_dword [[IDX:s[0-9]+]] -; GCN: s_add_i32 m0, [[IDX]], 4 -; GCN: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]] +; MOVREL: s_add_i32 m0, [[IDX]], 4 +; MOVREL: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]] + +; IDXMODE: s_add_i32 [[ADD_IDX:s[0-9]+]], [[IDX]], 4 +; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], src0 +; IDXMODE: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]] +; IDXMODE: s_set_gpr_idx_off + ; GCN: buffer_store_dword [[EXTRACT]] define void @extract_out_of_bounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { entry: @@ -514,8 +663,13 @@ ; GCN: s_load_dword [[IDX_IN:s[0-9]+]] ; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]] ; GCN-NOT: [[IDX_SHL]] -; GCN: s_mov_b32 m0, [[IDX_SHL]] -; GCN: v_movrels_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} + +; MOVREL: s_mov_b32 m0, [[IDX_SHL]] +; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} + +; IDXMODE: s_set_gpr_idx_on [[IDX_SHL]], src0 +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; IDXMODE: s_set_gpr_idx_off define void @extractelement_v4i32_or_index(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx.in) { entry: %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in @@ -530,8 +684,13 @@ ; GCN: s_load_dword [[IDX_IN:s[0-9]+]] ; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]] ; GCN-NOT: [[IDX_SHL]] -; GCN: s_mov_b32 m0, [[IDX_SHL]] -; GCN: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} + +; MOVREL: s_mov_b32 m0, [[IDX_SHL]] +; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} + +; IDXMODE: s_set_gpr_idx_on [[IDX_SHL]], dst +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; IDXMODE: s_set_gpr_idx_off define void @insertelement_v4f32_or_index(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %idx.in) nounwind { %idx.shl = shl i32 %idx.in, 2 %idx = or i32 %idx.shl, 1 @@ -547,13 +706,17 @@ ; GCN: {{^BB[0-9]+_[0-9]+}}: ; GCN: s_mov_b64 exec, +; IDXMODE: s_set_gpr_idx_off ; GCN: [[BB2]]: ; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]] ; GCN: buffer_load_dword ; GCN: [[REGLOOP:BB[0-9]+_[0-9]+]]: -; GCN: v_movreld_b32_e32 +; MOVREL: v_movreld_b32_e32 + +; IDXMODE: s_set_gpr_idx_idx +; IDXMODE: v_mov_b32_e32 ; GCN: s_cbranch_execnz [[REGLOOP]] define void @broken_phi_bb(i32 %arg, i32 %arg1) #0 { bb: