Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2733,7 +2733,7 @@ unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister( getIndirectIndexBegin(*MBB->getParent())); - return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) + return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC_V1)) .addOperand(I->getOperand(0)) .addOperand(I->getOperand(1)) .addReg(IndirectBaseReg) Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1969,9 +1969,9 @@ } // End UseNamedOperandTable = 1 -def SI_INDIRECT_SRC : InstSI < +class SI_INDIRECT_SRC : InstSI < (outs VGPR_32:$dst, SReg_64:$temp), - (ins unknown:$src, VSrc_32:$idx, i32imm:$off), + (ins rc:$src, VSrc_32:$idx, i32imm:$off), "si_indirect_src $dst, $temp, $src, $idx, $off", [] >; @@ -1985,6 +1985,13 @@ let Constraints = "$src = $dst"; } +// TODO: We can support indirect SGPR access. +def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC; + def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST; @@ -3023,42 +3030,42 @@ /********** Indirect adressing **********/ /********** ====================== **********/ -multiclass SI_INDIRECT_Pattern { +multiclass SI_INDIRECT_Pattern { // 1. Extract with offset def : Pat< (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))), - (SI_INDIRECT_SRC $vec, $idx, imm:$off) + (!cast("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, imm:$off) >; // 2. Extract without offset def : Pat< (eltvt (vector_extract vt:$vec, i32:$idx)), - (SI_INDIRECT_SRC $vec, $idx, 0) + (!cast("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, 0) >; // 3. Insert with offset def : Pat< (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)), - (IndDst $vec, $idx, imm:$off, $val) + (!cast("SI_INDIRECT_DST_"#VecSize) $vec, $idx, imm:$off, $val) >; // 4. Insert without offset def : Pat< (vector_insert vt:$vec, eltvt:$val, i32:$idx), - (IndDst $vec, $idx, 0, $val) + (!cast("SI_INDIRECT_DST_"#VecSize) $vec, $idx, 0, $val) >; } -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; -defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; //===----------------------------------------------------------------------===// // Conversion Patterns Index: lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- lib/Target/AMDGPU/SILowerControlFlow.cpp +++ lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -541,7 +541,11 @@ Branch(MI); break; - case AMDGPU::SI_INDIRECT_SRC: + case AMDGPU::SI_INDIRECT_SRC_V1: + case AMDGPU::SI_INDIRECT_SRC_V2: + case AMDGPU::SI_INDIRECT_SRC_V4: + case AMDGPU::SI_INDIRECT_SRC_V8: + case AMDGPU::SI_INDIRECT_SRC_V16: IndirectSrc(MI); break; Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -5,23 +5,52 @@ ; indexing of vectors. ; CHECK-LABEL: {{^}}extract_w_offset: +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movrels_b32_e32 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) { entry: - %0 = add i32 %in, 1 - %1 = extractelement <4 x float> , i32 %0 - store float %1, float addrspace(1)* %out + %idx = add i32 %in, 1 + %elt = extractelement <4 x float> , i32 %idx + store float %elt, float addrspace(1)* %out + ret void +} + +; XXX: Could do v_or_b32 directly +; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector: +; CHECK-DAG: s_or_b32 +; CHECK-DAG: s_or_b32 +; CHECK-DAG: s_or_b32 +; CHECK-DAG: s_or_b32 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; CHECK: s_mov_b32 m0 +; CHECK-NEXT: v_movrels_b32_e32 +define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) { +entry: + %idx = add i32 %in, 1 + %vec = or <4 x i32> %or.val, + %elt = extractelement <4 x i32> %vec, i32 %idx + store i32 %elt, i32 addrspace(1)* %out ret void } ; CHECK-LABEL: {{^}}extract_wo_offset: +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 +; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 ; CHECK: s_mov_b32 m0 ; CHECK-NEXT: v_movrels_b32_e32 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) { entry: - %0 = extractelement <4 x float> , i32 %in - store float %0, float addrspace(1)* %out + %elt = extractelement <4 x float> , i32 %in + store float %elt, float addrspace(1)* %out ret void } @@ -37,6 +66,19 @@ ret void } +; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0 +define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %or = or <4 x i32> %vec0, %vec1 + %value = extractelement <4 x i32> %or, i32 %index + store i32 %value, i32 addrspace(1)* %out + ret void +} + ; CHECK-LABEL: {{^}}extract_neg_offset_vgpr: ; The offset depends on the register that holds the first element of the vector. ; CHECK: v_readfirstlane_b32 @@ -87,6 +129,21 @@ ret void } +; The vector indexed into is originally loaded into an SGPR rather +; than built with a reg_sequence + +; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg: +; The offset depends on the register that holds the first element of the vector. +; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} +; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}} +define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) { +entry: + %index = add i32 %offset, -512 + %value = insertelement <4 x i32> %vec, i32 5, i32 %index + store <4 x i32> %value, <4 x i32> addrspace(1)* %out + ret void +} + ; CHECK-LABEL: {{^}}insert_neg_offset_vgpr: ; The offset depends on the register that holds the first element of the vector. ; CHECK: v_readfirstlane_b32