diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -279,6 +279,11 @@ setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand); setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand); + setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); + setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); + setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); + setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); + setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand); setOperationAction(ISD::Constant, MVT::i32, Legal); setOperationAction(ISD::Constant, MVT::i64, Legal); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -157,6 +157,9 @@ addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass); addRegisterClass(MVT::v8f64, &AMDGPU::VReg_512RegClass); + addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass); + addRegisterClass(MVT::v16f64, &AMDGPU::VReg_1024RegClass); + if (Subtarget->has16BitInsts()) { addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); @@ -168,10 +171,8 @@ addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass); } - if (Subtarget->hasMAIInsts()) { - addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); - addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass); - } + addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); + addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass); computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -243,6 +244,8 @@ setOperationAction(ISD::FP_ROUND, MVT::v4f32, Expand); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Expand); setOperationAction(ISD::FP_ROUND, MVT::v8f32, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v16i32, Expand); + setOperationAction(ISD::FP_ROUND, MVT::v16f32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); @@ -280,7 +283,7 @@ for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64, - MVT::v32i32, MVT::v32f32 }) { + MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32 }) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: @@ -352,6 +355,20 @@ AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v16i32); } + for (MVT Vec64 : { MVT::v16i64, MVT::v16f64 }) { + setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); + AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32); + + setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote); + AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v32i32); + + setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v32i32); + } + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); @@ -3916,12 +3933,14 @@ case AMDGPU::SI_INDIRECT_SRC_V4: case AMDGPU::SI_INDIRECT_SRC_V8: case AMDGPU::SI_INDIRECT_SRC_V16: + case AMDGPU::SI_INDIRECT_SRC_V32: return emitIndirectSrc(MI, *BB, *getSubtarget()); case AMDGPU::SI_INDIRECT_DST_V1: case AMDGPU::SI_INDIRECT_DST_V2: case AMDGPU::SI_INDIRECT_DST_V4: case AMDGPU::SI_INDIRECT_DST_V8: case AMDGPU::SI_INDIRECT_DST_V16: + case AMDGPU::SI_INDIRECT_DST_V32: return emitIndirectDst(MI, *BB, *getSubtarget()); case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO: case AMDGPU::SI_KILL_I1_PSEUDO: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -565,12 +565,14 @@ def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC; def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC; +def SI_INDIRECT_SRC_V32 : SI_INDIRECT_SRC; def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST; def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST; +def SI_INDIRECT_DST_V32 : SI_INDIRECT_DST; } // End Uses = [EXEC], Defs = [M0, EXEC] @@ -1192,8 +1194,8 @@ // 512-bit bitcast def : BitConvert ; def : BitConvert ; -def : BitConvert ; -def : BitConvert ; +def : BitConvert ; +def : BitConvert ; def : BitConvert ; def : BitConvert ; def : BitConvert ; @@ -1206,6 +1208,17 @@ // 1024-bit bitcast def : BitConvert ; def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; + /********** =================== **********/ /********** Src & Dst modifiers **********/ @@ -1581,11 +1594,13 @@ defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; defm : SI_INDIRECT_Pattern ; +defm : SI_INDIRECT_Pattern ; //===----------------------------------------------------------------------===// // SAD Patterns diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -773,12 +773,12 @@ let isAllocatable = 0; } -def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, +def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32, v16i64, v16f64], 32, (add SGPR_1024Regs)> { let AllocationPriority = 20; } -def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, +def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32, v16i64, v16f64], 32, (add SGPR_1024)> { let CopyCost = 16; let isAllocatable = 0; @@ -803,7 +803,7 @@ def VReg_192 : VRegClass<6, [untyped], (add VGPR_192)>; def VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64], (add VGPR_256)>; def VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>; -def VReg_1024 : VRegClass<32, [v32i32, v32f32], (add VGPR_1024)>; +def VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>; class ARegClass regTypes, dag regList> : VRegClass { @@ -819,7 +819,7 @@ def AReg_192 : ARegClass<6, [untyped], (add AGPR_192)>; def AReg_256 : ARegClass<8, [v8i32, v8f32, v4i64, v4f64], (add AGPR_256)>; def AReg_512 : ARegClass<16, [v16i32, v16f32, v8i64, v8f64], (add AGPR_512)>; -def AReg_1024 : ARegClass<32, [v32i32, v32f32], (add AGPR_1024)>; +def AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)>; } // End GeneratePressureSet = 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -1255,3 +1255,275 @@ store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef ret void } + +define amdgpu_ps float @dyn_extract_v16f32_v_s(<16 x float> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v16f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v16f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <16 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v32f32_v_s(<32 x float> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v32f32_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v32f32_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <32 x float> %vec, i32 %sel + ret float %ext +} + +define amdgpu_ps double @dyn_extract_v16f64_v_s(<16 x double> %vec, i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v16f64_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v32 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v16f64_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 +; MOVREL-NEXT: v_movrels_b32_e32 v32, v0 +; MOVREL-NEXT: v_movrels_b32_e32 v0, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v32 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <16 x double> %vec, i32 %sel + ret double %ext +} + +define amdgpu_ps float @dyn_extract_v16f32_s_s(i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v16f32_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s4, 1.0 +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b32 s19, 0x41800000 +; GPRIDX-NEXT: s_mov_b32 s18, 0x41700000 +; GPRIDX-NEXT: s_mov_b32 s17, 0x41600000 +; GPRIDX-NEXT: s_mov_b32 s16, 0x41500000 +; GPRIDX-NEXT: s_mov_b32 s15, 0x41400000 +; GPRIDX-NEXT: s_mov_b32 s14, 0x41300000 +; GPRIDX-NEXT: s_mov_b32 s13, 0x41200000 +; GPRIDX-NEXT: s_mov_b32 s12, 0x41100000 +; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 +; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 +; GPRIDX-NEXT: s_movrels_b32 s0, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v16f32_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s4, 1.0 +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b32 s19, 0x41800000 +; MOVREL-NEXT: s_mov_b32 s18, 0x41700000 +; MOVREL-NEXT: s_mov_b32 s17, 0x41600000 +; MOVREL-NEXT: s_mov_b32 s16, 0x41500000 +; MOVREL-NEXT: s_mov_b32 s15, 0x41400000 +; MOVREL-NEXT: s_mov_b32 s14, 0x41300000 +; MOVREL-NEXT: s_mov_b32 s13, 0x41200000 +; MOVREL-NEXT: s_mov_b32 s12, 0x41100000 +; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 +; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 +; MOVREL-NEXT: s_movrels_b32 s0, s4 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <16 x float> , i32 %sel + ret float %ext +} + +define amdgpu_ps float @dyn_extract_v32f32_s_s(i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v32f32_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s36, 1.0 +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b32 s67, 0x42000000 +; GPRIDX-NEXT: s_mov_b32 s66, 0x41f80000 +; GPRIDX-NEXT: s_mov_b32 s65, 0x41f00000 +; GPRIDX-NEXT: s_mov_b32 s64, 0x41e80000 +; GPRIDX-NEXT: s_mov_b32 s63, 0x41e00000 +; GPRIDX-NEXT: s_mov_b32 s62, 0x41d80000 +; GPRIDX-NEXT: s_mov_b32 s61, 0x41d00000 +; GPRIDX-NEXT: s_mov_b32 s60, 0x41c80000 +; GPRIDX-NEXT: s_mov_b32 s59, 0x41c00000 +; GPRIDX-NEXT: s_mov_b32 s58, 0x41b80000 +; GPRIDX-NEXT: s_mov_b32 s57, 0x41b00000 +; GPRIDX-NEXT: s_mov_b32 s56, 0x41a80000 +; GPRIDX-NEXT: s_mov_b32 s55, 0x41a00000 +; GPRIDX-NEXT: s_mov_b32 s54, 0x41980000 +; GPRIDX-NEXT: s_mov_b32 s53, 0x41900000 +; GPRIDX-NEXT: s_mov_b32 s52, 0x41880000 +; GPRIDX-NEXT: s_mov_b32 s51, 0x41800000 +; GPRIDX-NEXT: s_mov_b32 s50, 0x41700000 +; GPRIDX-NEXT: s_mov_b32 s49, 0x41600000 +; GPRIDX-NEXT: s_mov_b32 s48, 0x41500000 +; GPRIDX-NEXT: s_mov_b32 s47, 0x41400000 +; GPRIDX-NEXT: s_mov_b32 s46, 0x41300000 +; GPRIDX-NEXT: s_mov_b32 s45, 0x41200000 +; GPRIDX-NEXT: s_mov_b32 s44, 0x41100000 +; GPRIDX-NEXT: s_mov_b32 s43, 0x41000000 +; GPRIDX-NEXT: s_mov_b32 s42, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s41, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s40, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s39, 4.0 +; GPRIDX-NEXT: s_mov_b32 s38, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s37, 2.0 +; GPRIDX-NEXT: s_movrels_b32 s0, s36 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v32f32_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s36, 1.0 +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b32 s67, 0x42000000 +; MOVREL-NEXT: s_mov_b32 s66, 0x41f80000 +; MOVREL-NEXT: s_mov_b32 s65, 0x41f00000 +; MOVREL-NEXT: s_mov_b32 s64, 0x41e80000 +; MOVREL-NEXT: s_mov_b32 s63, 0x41e00000 +; MOVREL-NEXT: s_mov_b32 s62, 0x41d80000 +; MOVREL-NEXT: s_mov_b32 s61, 0x41d00000 +; MOVREL-NEXT: s_mov_b32 s60, 0x41c80000 +; MOVREL-NEXT: s_mov_b32 s59, 0x41c00000 +; MOVREL-NEXT: s_mov_b32 s58, 0x41b80000 +; MOVREL-NEXT: s_mov_b32 s57, 0x41b00000 +; MOVREL-NEXT: s_mov_b32 s56, 0x41a80000 +; MOVREL-NEXT: s_mov_b32 s55, 0x41a00000 +; MOVREL-NEXT: s_mov_b32 s54, 0x41980000 +; MOVREL-NEXT: s_mov_b32 s53, 0x41900000 +; MOVREL-NEXT: s_mov_b32 s52, 0x41880000 +; MOVREL-NEXT: s_mov_b32 s51, 0x41800000 +; MOVREL-NEXT: s_mov_b32 s50, 0x41700000 +; MOVREL-NEXT: s_mov_b32 s49, 0x41600000 +; MOVREL-NEXT: s_mov_b32 s48, 0x41500000 +; MOVREL-NEXT: s_mov_b32 s47, 0x41400000 +; MOVREL-NEXT: s_mov_b32 s46, 0x41300000 +; MOVREL-NEXT: s_mov_b32 s45, 0x41200000 +; MOVREL-NEXT: s_mov_b32 s44, 0x41100000 +; MOVREL-NEXT: s_mov_b32 s43, 0x41000000 +; MOVREL-NEXT: s_mov_b32 s42, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s41, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s40, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s39, 4.0 +; MOVREL-NEXT: s_mov_b32 s38, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s37, 2.0 +; MOVREL-NEXT: s_movrels_b32 s0, s36 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <32 x float> , i32 %sel + ret float %ext +} + +define amdgpu_ps double @dyn_extract_v16f64_s_s(i32 inreg %sel) { +; GPRIDX-LABEL: dyn_extract_v16f64_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s66, 0 +; GPRIDX-NEXT: s_mov_b64 s[36:37], 1.0 +; GPRIDX-NEXT: s_mov_b32 m0, s2 +; GPRIDX-NEXT: s_mov_b32 s67, 0x40300000 +; GPRIDX-NEXT: s_mov_b32 s65, 0x402e0000 +; GPRIDX-NEXT: s_mov_b32 s64, s66 +; GPRIDX-NEXT: s_mov_b32 s63, 0x402c0000 +; GPRIDX-NEXT: s_mov_b32 s62, s66 +; GPRIDX-NEXT: s_mov_b32 s61, 0x402a0000 +; GPRIDX-NEXT: s_mov_b32 s60, s66 +; GPRIDX-NEXT: s_mov_b32 s59, 0x40280000 +; GPRIDX-NEXT: s_mov_b32 s58, s66 +; GPRIDX-NEXT: s_mov_b32 s57, 0x40260000 +; GPRIDX-NEXT: s_mov_b32 s56, s66 +; GPRIDX-NEXT: s_mov_b32 s55, 0x40240000 +; GPRIDX-NEXT: s_mov_b32 s54, s66 +; GPRIDX-NEXT: s_mov_b32 s53, 0x40220000 +; GPRIDX-NEXT: s_mov_b32 s52, s66 +; GPRIDX-NEXT: s_mov_b32 s51, 0x40200000 +; GPRIDX-NEXT: s_mov_b32 s50, s66 +; GPRIDX-NEXT: s_mov_b32 s49, 0x401c0000 +; GPRIDX-NEXT: s_mov_b32 s48, s66 +; GPRIDX-NEXT: s_mov_b32 s47, 0x40180000 +; GPRIDX-NEXT: s_mov_b32 s46, s66 +; GPRIDX-NEXT: s_mov_b32 s45, 0x40140000 +; GPRIDX-NEXT: s_mov_b32 s44, s66 +; GPRIDX-NEXT: s_mov_b64 s[42:43], 4.0 +; GPRIDX-NEXT: s_mov_b32 s41, 0x40080000 +; GPRIDX-NEXT: s_mov_b32 s40, s66 +; GPRIDX-NEXT: s_mov_b64 s[38:39], 2.0 +; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[36:37] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_extract_v16f64_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s66, 0 +; MOVREL-NEXT: s_mov_b64 s[36:37], 1.0 +; MOVREL-NEXT: s_mov_b32 m0, s2 +; MOVREL-NEXT: s_mov_b32 s67, 0x40300000 +; MOVREL-NEXT: s_mov_b32 s65, 0x402e0000 +; MOVREL-NEXT: s_mov_b32 s64, s66 +; MOVREL-NEXT: s_mov_b32 s63, 0x402c0000 +; MOVREL-NEXT: s_mov_b32 s62, s66 +; MOVREL-NEXT: s_mov_b32 s61, 0x402a0000 +; MOVREL-NEXT: s_mov_b32 s60, s66 +; MOVREL-NEXT: s_mov_b32 s59, 0x40280000 +; MOVREL-NEXT: s_mov_b32 s58, s66 +; MOVREL-NEXT: s_mov_b32 s57, 0x40260000 +; MOVREL-NEXT: s_mov_b32 s56, s66 +; MOVREL-NEXT: s_mov_b32 s55, 0x40240000 +; MOVREL-NEXT: s_mov_b32 s54, s66 +; MOVREL-NEXT: s_mov_b32 s53, 0x40220000 +; MOVREL-NEXT: s_mov_b32 s52, s66 +; MOVREL-NEXT: s_mov_b32 s51, 0x40200000 +; MOVREL-NEXT: s_mov_b32 s50, s66 +; MOVREL-NEXT: s_mov_b32 s49, 0x401c0000 +; MOVREL-NEXT: s_mov_b32 s48, s66 +; MOVREL-NEXT: s_mov_b32 s47, 0x40180000 +; MOVREL-NEXT: s_mov_b32 s46, s66 +; MOVREL-NEXT: s_mov_b32 s45, 0x40140000 +; MOVREL-NEXT: s_mov_b32 s44, s66 +; MOVREL-NEXT: s_mov_b64 s[42:43], 4.0 +; MOVREL-NEXT: s_mov_b32 s41, 0x40080000 +; MOVREL-NEXT: s_mov_b32 s40, s66 +; MOVREL-NEXT: s_mov_b64 s[38:39], 2.0 +; MOVREL-NEXT: s_movrels_b64 s[0:1], s[36:37] +; MOVREL-NEXT: ; return to shader part epilog +entry: + %ext = extractelement <16 x double> , i32 %sel + ret double %ext +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -2179,3 +2179,1231 @@ store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef ret void } + +define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_s_s(<16 x i32> inreg %vec, i32 inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16i32_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 m0, s19 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b32 s0, s18 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16i32_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s19 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movreld_b32 s0, s18 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx + ret <16 x i32> %insert +} + +define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_s_s(<16 x float> inreg %vec, float inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16f32_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 m0, s19 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b32 s0, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16f32_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s19 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_movreld_b32 s0, s18 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: v_mov_b32_e32 v2, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s15 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x float> %vec, float %val, i32 %idx + ret <16 x float> %insert +} + +define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_s_s(<32 x float> inreg %vec, float inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v32f32_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 s16, s18 +; GPRIDX-NEXT: s_mov_b32 s17, s19 +; GPRIDX-NEXT: s_mov_b32 s18, s20 +; GPRIDX-NEXT: s_mov_b32 s19, s21 +; GPRIDX-NEXT: s_mov_b32 s20, s22 +; GPRIDX-NEXT: s_mov_b32 s21, s23 +; GPRIDX-NEXT: s_mov_b32 s22, s24 +; GPRIDX-NEXT: s_mov_b32 s23, s25 +; GPRIDX-NEXT: s_mov_b32 s24, s26 +; GPRIDX-NEXT: s_mov_b32 s25, s27 +; GPRIDX-NEXT: s_mov_b32 s26, s28 +; GPRIDX-NEXT: s_mov_b32 s27, s29 +; GPRIDX-NEXT: s_mov_b32 s28, s30 +; GPRIDX-NEXT: s_mov_b32 s29, s31 +; GPRIDX-NEXT: s_mov_b32 s31, s33 +; GPRIDX-NEXT: s_mov_b32 s30, s32 +; GPRIDX-NEXT: s_mov_b32 m0, s35 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b32 s0, s34 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v20, s20 +; GPRIDX-NEXT: v_mov_b32_e32 v21, s21 +; GPRIDX-NEXT: v_mov_b32_e32 v22, s22 +; GPRIDX-NEXT: v_mov_b32_e32 v23, s23 +; GPRIDX-NEXT: v_mov_b32_e32 v24, s24 +; GPRIDX-NEXT: v_mov_b32_e32 v25, s25 +; GPRIDX-NEXT: v_mov_b32_e32 v26, s26 +; GPRIDX-NEXT: v_mov_b32_e32 v27, s27 +; GPRIDX-NEXT: v_mov_b32_e32 v28, s28 +; GPRIDX-NEXT: v_mov_b32_e32 v29, s29 +; GPRIDX-NEXT: v_mov_b32_e32 v30, s30 +; GPRIDX-NEXT: v_mov_b32_e32 v31, s31 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v32f32_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 m0, s35 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b32 s16, s18 +; MOVREL-NEXT: s_mov_b32 s17, s19 +; MOVREL-NEXT: s_mov_b32 s18, s20 +; MOVREL-NEXT: s_mov_b32 s19, s21 +; MOVREL-NEXT: s_mov_b32 s20, s22 +; MOVREL-NEXT: s_mov_b32 s21, s23 +; MOVREL-NEXT: s_mov_b32 s22, s24 +; MOVREL-NEXT: s_mov_b32 s23, s25 +; MOVREL-NEXT: s_mov_b32 s24, s26 +; MOVREL-NEXT: s_mov_b32 s25, s27 +; MOVREL-NEXT: s_mov_b32 s26, s28 +; MOVREL-NEXT: s_mov_b32 s27, s29 +; MOVREL-NEXT: s_mov_b32 s28, s30 +; MOVREL-NEXT: s_mov_b32 s29, s31 +; MOVREL-NEXT: s_mov_b32 s31, s33 +; MOVREL-NEXT: s_mov_b32 s30, s32 +; MOVREL-NEXT: s_movreld_b32 s0, s34 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: v_mov_b32_e32 v2, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s15 +; MOVREL-NEXT: v_mov_b32_e32 v16, s16 +; MOVREL-NEXT: v_mov_b32_e32 v17, s17 +; MOVREL-NEXT: v_mov_b32_e32 v18, s18 +; MOVREL-NEXT: v_mov_b32_e32 v19, s19 +; MOVREL-NEXT: v_mov_b32_e32 v20, s20 +; MOVREL-NEXT: v_mov_b32_e32 v21, s21 +; MOVREL-NEXT: v_mov_b32_e32 v22, s22 +; MOVREL-NEXT: v_mov_b32_e32 v23, s23 +; MOVREL-NEXT: v_mov_b32_e32 v24, s24 +; MOVREL-NEXT: v_mov_b32_e32 v25, s25 +; MOVREL-NEXT: v_mov_b32_e32 v26, s26 +; MOVREL-NEXT: v_mov_b32_e32 v27, s27 +; MOVREL-NEXT: v_mov_b32_e32 v28, s28 +; MOVREL-NEXT: v_mov_b32_e32 v29, s29 +; MOVREL-NEXT: v_mov_b32_e32 v30, s30 +; MOVREL-NEXT: v_mov_b32_e32 v31, s31 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <32 x float> %vec, float %val, i32 %idx + ret <32 x float> %insert +} + +define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_s_s(<16 x i64> inreg %vec, i64 inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16i64_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 s16, s18 +; GPRIDX-NEXT: s_mov_b32 s17, s19 +; GPRIDX-NEXT: s_mov_b32 s18, s20 +; GPRIDX-NEXT: s_mov_b32 s19, s21 +; GPRIDX-NEXT: s_mov_b32 s20, s22 +; GPRIDX-NEXT: s_mov_b32 s21, s23 +; GPRIDX-NEXT: s_mov_b32 s22, s24 +; GPRIDX-NEXT: s_mov_b32 s23, s25 +; GPRIDX-NEXT: s_mov_b32 s24, s26 +; GPRIDX-NEXT: s_mov_b32 s25, s27 +; GPRIDX-NEXT: s_mov_b32 s26, s28 +; GPRIDX-NEXT: s_mov_b32 s27, s29 +; GPRIDX-NEXT: s_mov_b32 s28, s30 +; GPRIDX-NEXT: s_mov_b32 s29, s31 +; GPRIDX-NEXT: s_mov_b32 s31, s33 +; GPRIDX-NEXT: s_mov_b32 s30, s32 +; GPRIDX-NEXT: s_mov_b32 m0, s36 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16i64_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s36 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b32 s16, s18 +; MOVREL-NEXT: s_mov_b32 s17, s19 +; MOVREL-NEXT: s_mov_b32 s18, s20 +; MOVREL-NEXT: s_mov_b32 s19, s21 +; MOVREL-NEXT: s_mov_b32 s20, s22 +; MOVREL-NEXT: s_mov_b32 s21, s23 +; MOVREL-NEXT: s_mov_b32 s22, s24 +; MOVREL-NEXT: s_mov_b32 s23, s25 +; MOVREL-NEXT: s_mov_b32 s24, s26 +; MOVREL-NEXT: s_mov_b32 s25, s27 +; MOVREL-NEXT: s_mov_b32 s26, s28 +; MOVREL-NEXT: s_mov_b32 s27, s29 +; MOVREL-NEXT: s_mov_b32 s28, s30 +; MOVREL-NEXT: s_mov_b32 s29, s31 +; MOVREL-NEXT: s_mov_b32 s31, s33 +; MOVREL-NEXT: s_mov_b32 s30, s32 +; MOVREL-NEXT: s_movreld_b64 s[0:1], s[34:35] +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx + ret <16 x i64> %insert +} + +define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_s_s(<16 x double> inreg %vec, double inreg %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16f64_s_s_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 s16, s18 +; GPRIDX-NEXT: s_mov_b32 s17, s19 +; GPRIDX-NEXT: s_mov_b32 s18, s20 +; GPRIDX-NEXT: s_mov_b32 s19, s21 +; GPRIDX-NEXT: s_mov_b32 s20, s22 +; GPRIDX-NEXT: s_mov_b32 s21, s23 +; GPRIDX-NEXT: s_mov_b32 s22, s24 +; GPRIDX-NEXT: s_mov_b32 s23, s25 +; GPRIDX-NEXT: s_mov_b32 s24, s26 +; GPRIDX-NEXT: s_mov_b32 s25, s27 +; GPRIDX-NEXT: s_mov_b32 s26, s28 +; GPRIDX-NEXT: s_mov_b32 s27, s29 +; GPRIDX-NEXT: s_mov_b32 s28, s30 +; GPRIDX-NEXT: s_mov_b32 s29, s31 +; GPRIDX-NEXT: s_mov_b32 s31, s33 +; GPRIDX-NEXT: s_mov_b32 s30, s32 +; GPRIDX-NEXT: s_mov_b32 m0, s36 +; GPRIDX-NEXT: s_nop 0 +; GPRIDX-NEXT: s_movreld_b64 s[0:1], s[34:35] +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16f64_s_s_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 m0, s36 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b32 s16, s18 +; MOVREL-NEXT: s_mov_b32 s17, s19 +; MOVREL-NEXT: s_mov_b32 s18, s20 +; MOVREL-NEXT: s_mov_b32 s19, s21 +; MOVREL-NEXT: s_mov_b32 s20, s22 +; MOVREL-NEXT: s_mov_b32 s21, s23 +; MOVREL-NEXT: s_mov_b32 s22, s24 +; MOVREL-NEXT: s_mov_b32 s23, s25 +; MOVREL-NEXT: s_mov_b32 s24, s26 +; MOVREL-NEXT: s_mov_b32 s25, s27 +; MOVREL-NEXT: s_mov_b32 s26, s28 +; MOVREL-NEXT: s_mov_b32 s27, s29 +; MOVREL-NEXT: s_mov_b32 s28, s30 +; MOVREL-NEXT: s_mov_b32 s29, s31 +; MOVREL-NEXT: s_mov_b32 s31, s33 +; MOVREL-NEXT: s_mov_b32 s30, s32 +; MOVREL-NEXT: s_movreld_b64 s[0:1], s[34:35] +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x double> %vec, double %val, i32 %idx + ret <16 x double> %insert +} + +define amdgpu_ps <16 x i32> @dyn_insertelement_v16i32_s_v_s(<16 x i32> inreg %vec, i32 %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16i32_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 +; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v1, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v1 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v13 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v14 +; GPRIDX-NEXT: v_readfirstlane_b32 s14, v15 +; GPRIDX-NEXT: v_readfirstlane_b32 s15, v16 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16i32_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: v_mov_b32_e32 v16, s15 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: v_mov_b32_e32 v1, s0 +; MOVREL-NEXT: v_mov_b32_e32 v14, s13 +; MOVREL-NEXT: v_mov_b32_e32 v15, s14 +; MOVREL-NEXT: v_mov_b32_e32 v13, s12 +; MOVREL-NEXT: v_mov_b32_e32 v12, s11 +; MOVREL-NEXT: v_mov_b32_e32 v11, s10 +; MOVREL-NEXT: v_mov_b32_e32 v10, s9 +; MOVREL-NEXT: v_mov_b32_e32 v9, s8 +; MOVREL-NEXT: v_mov_b32_e32 v8, s7 +; MOVREL-NEXT: v_mov_b32_e32 v7, s6 +; MOVREL-NEXT: v_mov_b32_e32 v6, s5 +; MOVREL-NEXT: v_mov_b32_e32 v5, s4 +; MOVREL-NEXT: v_mov_b32_e32 v4, s3 +; MOVREL-NEXT: v_mov_b32_e32 v3, s2 +; MOVREL-NEXT: v_mov_b32_e32 v2, s1 +; MOVREL-NEXT: v_movreld_b32_e32 v1, v0 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v2 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v11 +; MOVREL-NEXT: v_readfirstlane_b32 s11, v12 +; MOVREL-NEXT: v_readfirstlane_b32 s12, v13 +; MOVREL-NEXT: v_readfirstlane_b32 s13, v14 +; MOVREL-NEXT: v_readfirstlane_b32 s14, v15 +; MOVREL-NEXT: v_readfirstlane_b32 s15, v16 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x i32> %vec, i32 %val, i32 %idx + ret <16 x i32> %insert +} + +define amdgpu_ps <16 x float> @dyn_insertelement_v16f32_s_v_s(<16 x float> inreg %vec, float %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16f32_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 +; GPRIDX-NEXT: s_set_gpr_idx_on s18, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v16 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16f32_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: v_mov_b32_e32 v16, v0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: s_mov_b32 m0, s18 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: v_mov_b32_e32 v2, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s15 +; MOVREL-NEXT: v_movreld_b32_e32 v0, v16 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x float> %vec, float %val, i32 %idx + ret <16 x float> %insert +} + +define amdgpu_ps <32 x float> @dyn_insertelement_v32f32_s_v_s(<32 x float> inreg %vec, float %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v32f32_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 s16, s18 +; GPRIDX-NEXT: s_mov_b32 s17, s19 +; GPRIDX-NEXT: s_mov_b32 s18, s20 +; GPRIDX-NEXT: s_mov_b32 s19, s21 +; GPRIDX-NEXT: s_mov_b32 s20, s22 +; GPRIDX-NEXT: s_mov_b32 s21, s23 +; GPRIDX-NEXT: s_mov_b32 s22, s24 +; GPRIDX-NEXT: s_mov_b32 s23, s25 +; GPRIDX-NEXT: s_mov_b32 s24, s26 +; GPRIDX-NEXT: s_mov_b32 s25, s27 +; GPRIDX-NEXT: s_mov_b32 s26, s28 +; GPRIDX-NEXT: s_mov_b32 s27, s29 +; GPRIDX-NEXT: s_mov_b32 s28, s30 +; GPRIDX-NEXT: s_mov_b32 s29, s31 +; GPRIDX-NEXT: s_mov_b32 s31, s33 +; GPRIDX-NEXT: v_mov_b32_e32 v32, v0 +; GPRIDX-NEXT: s_mov_b32 s30, s32 +; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v20, s20 +; GPRIDX-NEXT: v_mov_b32_e32 v21, s21 +; GPRIDX-NEXT: v_mov_b32_e32 v22, s22 +; GPRIDX-NEXT: v_mov_b32_e32 v23, s23 +; GPRIDX-NEXT: v_mov_b32_e32 v24, s24 +; GPRIDX-NEXT: v_mov_b32_e32 v25, s25 +; GPRIDX-NEXT: v_mov_b32_e32 v26, s26 +; GPRIDX-NEXT: v_mov_b32_e32 v27, s27 +; GPRIDX-NEXT: v_mov_b32_e32 v28, s28 +; GPRIDX-NEXT: v_mov_b32_e32 v29, s29 +; GPRIDX-NEXT: v_mov_b32_e32 v30, s30 +; GPRIDX-NEXT: v_mov_b32_e32 v31, s31 +; GPRIDX-NEXT: s_set_gpr_idx_on s34, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v0, v32 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v32f32_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b32 s16, s18 +; MOVREL-NEXT: s_mov_b32 s17, s19 +; MOVREL-NEXT: s_mov_b32 s18, s20 +; MOVREL-NEXT: s_mov_b32 s19, s21 +; MOVREL-NEXT: s_mov_b32 s20, s22 +; MOVREL-NEXT: s_mov_b32 s21, s23 +; MOVREL-NEXT: s_mov_b32 s22, s24 +; MOVREL-NEXT: s_mov_b32 s23, s25 +; MOVREL-NEXT: s_mov_b32 s24, s26 +; MOVREL-NEXT: s_mov_b32 s25, s27 +; MOVREL-NEXT: s_mov_b32 s26, s28 +; MOVREL-NEXT: s_mov_b32 s27, s29 +; MOVREL-NEXT: s_mov_b32 s28, s30 +; MOVREL-NEXT: s_mov_b32 s29, s31 +; MOVREL-NEXT: s_mov_b32 s31, s33 +; MOVREL-NEXT: s_mov_b32 s30, s32 +; MOVREL-NEXT: v_mov_b32_e32 v32, v0 +; MOVREL-NEXT: v_mov_b32_e32 v0, s0 +; MOVREL-NEXT: s_mov_b32 m0, s34 +; MOVREL-NEXT: v_mov_b32_e32 v1, s1 +; MOVREL-NEXT: v_mov_b32_e32 v2, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s15 +; MOVREL-NEXT: v_mov_b32_e32 v16, s16 +; MOVREL-NEXT: v_mov_b32_e32 v17, s17 +; MOVREL-NEXT: v_mov_b32_e32 v18, s18 +; MOVREL-NEXT: v_mov_b32_e32 v19, s19 +; MOVREL-NEXT: v_mov_b32_e32 v20, s20 +; MOVREL-NEXT: v_mov_b32_e32 v21, s21 +; MOVREL-NEXT: v_mov_b32_e32 v22, s22 +; MOVREL-NEXT: v_mov_b32_e32 v23, s23 +; MOVREL-NEXT: v_mov_b32_e32 v24, s24 +; MOVREL-NEXT: v_mov_b32_e32 v25, s25 +; MOVREL-NEXT: v_mov_b32_e32 v26, s26 +; MOVREL-NEXT: v_mov_b32_e32 v27, s27 +; MOVREL-NEXT: v_mov_b32_e32 v28, s28 +; MOVREL-NEXT: v_mov_b32_e32 v29, s29 +; MOVREL-NEXT: v_mov_b32_e32 v30, s30 +; MOVREL-NEXT: v_mov_b32_e32 v31, s31 +; MOVREL-NEXT: v_movreld_b32_e32 v0, v32 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <32 x float> %vec, float %val, i32 %idx + ret <32 x float> %insert +} + +define amdgpu_ps <16 x i64> @dyn_insertelement_v16i64_s_v_s(<16 x i64> inreg %vec, i64 %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16i64_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 s17, s19 +; GPRIDX-NEXT: s_mov_b32 s19, s21 +; GPRIDX-NEXT: s_mov_b32 s21, s23 +; GPRIDX-NEXT: s_mov_b32 s23, s25 +; GPRIDX-NEXT: s_mov_b32 s25, s27 +; GPRIDX-NEXT: s_mov_b32 s27, s29 +; GPRIDX-NEXT: s_mov_b32 s29, s31 +; GPRIDX-NEXT: s_mov_b32 s31, s33 +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s16, s18 +; GPRIDX-NEXT: s_mov_b32 s18, s20 +; GPRIDX-NEXT: s_mov_b32 s20, s22 +; GPRIDX-NEXT: s_mov_b32 s22, s24 +; GPRIDX-NEXT: s_mov_b32 s24, s26 +; GPRIDX-NEXT: s_mov_b32 s26, s28 +; GPRIDX-NEXT: s_mov_b32 s28, s30 +; GPRIDX-NEXT: s_mov_b32 s30, s32 +; GPRIDX-NEXT: v_mov_b32_e32 v33, s31 +; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1 +; GPRIDX-NEXT: v_mov_b32_e32 v32, s30 +; GPRIDX-NEXT: v_mov_b32_e32 v31, s29 +; GPRIDX-NEXT: v_mov_b32_e32 v30, s28 +; GPRIDX-NEXT: v_mov_b32_e32 v29, s27 +; GPRIDX-NEXT: v_mov_b32_e32 v28, s26 +; GPRIDX-NEXT: v_mov_b32_e32 v27, s25 +; GPRIDX-NEXT: v_mov_b32_e32 v26, s24 +; GPRIDX-NEXT: v_mov_b32_e32 v25, s23 +; GPRIDX-NEXT: v_mov_b32_e32 v24, s22 +; GPRIDX-NEXT: v_mov_b32_e32 v23, s21 +; GPRIDX-NEXT: v_mov_b32_e32 v22, s20 +; GPRIDX-NEXT: v_mov_b32_e32 v21, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v20, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v19, s17 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 +; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 +; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16 +; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17 +; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18 +; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19 +; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20 +; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21 +; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22 +; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23 +; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24 +; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25 +; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26 +; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27 +; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28 +; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29 +; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30 +; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31 +; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32 +; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16i64_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b32 s17, s19 +; MOVREL-NEXT: s_mov_b32 s19, s21 +; MOVREL-NEXT: s_mov_b32 s21, s23 +; MOVREL-NEXT: s_mov_b32 s23, s25 +; MOVREL-NEXT: s_mov_b32 s25, s27 +; MOVREL-NEXT: s_mov_b32 s27, s29 +; MOVREL-NEXT: s_mov_b32 s29, s31 +; MOVREL-NEXT: s_mov_b32 s31, s33 +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s16, s18 +; MOVREL-NEXT: s_mov_b32 s18, s20 +; MOVREL-NEXT: s_mov_b32 s20, s22 +; MOVREL-NEXT: s_mov_b32 s22, s24 +; MOVREL-NEXT: s_mov_b32 s24, s26 +; MOVREL-NEXT: s_mov_b32 s26, s28 +; MOVREL-NEXT: s_mov_b32 s28, s30 +; MOVREL-NEXT: s_mov_b32 s30, s32 +; MOVREL-NEXT: v_mov_b32_e32 v33, s31 +; MOVREL-NEXT: v_mov_b32_e32 v2, s0 +; MOVREL-NEXT: s_lshl_b32 m0, s34, 1 +; MOVREL-NEXT: v_mov_b32_e32 v31, s29 +; MOVREL-NEXT: v_mov_b32_e32 v32, s30 +; MOVREL-NEXT: v_mov_b32_e32 v30, s28 +; MOVREL-NEXT: v_mov_b32_e32 v29, s27 +; MOVREL-NEXT: v_mov_b32_e32 v28, s26 +; MOVREL-NEXT: v_mov_b32_e32 v27, s25 +; MOVREL-NEXT: v_mov_b32_e32 v26, s24 +; MOVREL-NEXT: v_mov_b32_e32 v25, s23 +; MOVREL-NEXT: v_mov_b32_e32 v24, s22 +; MOVREL-NEXT: v_mov_b32_e32 v23, s21 +; MOVREL-NEXT: v_mov_b32_e32 v22, s20 +; MOVREL-NEXT: v_mov_b32_e32 v21, s19 +; MOVREL-NEXT: v_mov_b32_e32 v20, s18 +; MOVREL-NEXT: v_mov_b32_e32 v19, s17 +; MOVREL-NEXT: v_mov_b32_e32 v18, s16 +; MOVREL-NEXT: v_mov_b32_e32 v17, s15 +; MOVREL-NEXT: v_mov_b32_e32 v16, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s1 +; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 +; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v11 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v12 +; MOVREL-NEXT: v_readfirstlane_b32 s11, v13 +; MOVREL-NEXT: v_readfirstlane_b32 s12, v14 +; MOVREL-NEXT: v_readfirstlane_b32 s13, v15 +; MOVREL-NEXT: v_readfirstlane_b32 s14, v16 +; MOVREL-NEXT: v_readfirstlane_b32 s15, v17 +; MOVREL-NEXT: v_readfirstlane_b32 s16, v18 +; MOVREL-NEXT: v_readfirstlane_b32 s17, v19 +; MOVREL-NEXT: v_readfirstlane_b32 s18, v20 +; MOVREL-NEXT: v_readfirstlane_b32 s19, v21 +; MOVREL-NEXT: v_readfirstlane_b32 s20, v22 +; MOVREL-NEXT: v_readfirstlane_b32 s21, v23 +; MOVREL-NEXT: v_readfirstlane_b32 s22, v24 +; MOVREL-NEXT: v_readfirstlane_b32 s23, v25 +; MOVREL-NEXT: v_readfirstlane_b32 s24, v26 +; MOVREL-NEXT: v_readfirstlane_b32 s25, v27 +; MOVREL-NEXT: v_readfirstlane_b32 s26, v28 +; MOVREL-NEXT: v_readfirstlane_b32 s27, v29 +; MOVREL-NEXT: v_readfirstlane_b32 s28, v30 +; MOVREL-NEXT: v_readfirstlane_b32 s29, v31 +; MOVREL-NEXT: v_readfirstlane_b32 s30, v32 +; MOVREL-NEXT: v_readfirstlane_b32 s31, v33 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x i64> %vec, i64 %val, i32 %idx + ret <16 x i64> %insert +} + +define amdgpu_ps <16 x double> @dyn_insertelement_v16f64_s_v_s(<16 x double> inreg %vec, double %val, i32 inreg %idx) { +; GPRIDX-LABEL: dyn_insertelement_v16f64_s_v_s: +; GPRIDX: ; %bb.0: ; %entry +; GPRIDX-NEXT: s_mov_b32 s1, s3 +; GPRIDX-NEXT: s_mov_b32 s3, s5 +; GPRIDX-NEXT: s_mov_b32 s5, s7 +; GPRIDX-NEXT: s_mov_b32 s7, s9 +; GPRIDX-NEXT: s_mov_b32 s9, s11 +; GPRIDX-NEXT: s_mov_b32 s11, s13 +; GPRIDX-NEXT: s_mov_b32 s13, s15 +; GPRIDX-NEXT: s_mov_b32 s15, s17 +; GPRIDX-NEXT: s_mov_b32 s17, s19 +; GPRIDX-NEXT: s_mov_b32 s19, s21 +; GPRIDX-NEXT: s_mov_b32 s21, s23 +; GPRIDX-NEXT: s_mov_b32 s23, s25 +; GPRIDX-NEXT: s_mov_b32 s25, s27 +; GPRIDX-NEXT: s_mov_b32 s27, s29 +; GPRIDX-NEXT: s_mov_b32 s29, s31 +; GPRIDX-NEXT: s_mov_b32 s31, s33 +; GPRIDX-NEXT: s_mov_b32 s0, s2 +; GPRIDX-NEXT: s_mov_b32 s2, s4 +; GPRIDX-NEXT: s_mov_b32 s4, s6 +; GPRIDX-NEXT: s_mov_b32 s6, s8 +; GPRIDX-NEXT: s_mov_b32 s8, s10 +; GPRIDX-NEXT: s_mov_b32 s10, s12 +; GPRIDX-NEXT: s_mov_b32 s12, s14 +; GPRIDX-NEXT: s_mov_b32 s14, s16 +; GPRIDX-NEXT: s_mov_b32 s16, s18 +; GPRIDX-NEXT: s_mov_b32 s18, s20 +; GPRIDX-NEXT: s_mov_b32 s20, s22 +; GPRIDX-NEXT: s_mov_b32 s22, s24 +; GPRIDX-NEXT: s_mov_b32 s24, s26 +; GPRIDX-NEXT: s_mov_b32 s26, s28 +; GPRIDX-NEXT: s_mov_b32 s28, s30 +; GPRIDX-NEXT: s_mov_b32 s30, s32 +; GPRIDX-NEXT: v_mov_b32_e32 v33, s31 +; GPRIDX-NEXT: s_lshl_b32 s33, s34, 1 +; GPRIDX-NEXT: v_mov_b32_e32 v32, s30 +; GPRIDX-NEXT: v_mov_b32_e32 v31, s29 +; GPRIDX-NEXT: v_mov_b32_e32 v30, s28 +; GPRIDX-NEXT: v_mov_b32_e32 v29, s27 +; GPRIDX-NEXT: v_mov_b32_e32 v28, s26 +; GPRIDX-NEXT: v_mov_b32_e32 v27, s25 +; GPRIDX-NEXT: v_mov_b32_e32 v26, s24 +; GPRIDX-NEXT: v_mov_b32_e32 v25, s23 +; GPRIDX-NEXT: v_mov_b32_e32 v24, s22 +; GPRIDX-NEXT: v_mov_b32_e32 v23, s21 +; GPRIDX-NEXT: v_mov_b32_e32 v22, s20 +; GPRIDX-NEXT: v_mov_b32_e32 v21, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v20, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v19, s17 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 +; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v2, v0 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: s_set_gpr_idx_on s33, gpr_idx(DST) +; GPRIDX-NEXT: v_mov_b32_e32 v3, v1 +; GPRIDX-NEXT: s_set_gpr_idx_off +; GPRIDX-NEXT: v_readfirstlane_b32 s0, v2 +; GPRIDX-NEXT: v_readfirstlane_b32 s1, v3 +; GPRIDX-NEXT: v_readfirstlane_b32 s2, v4 +; GPRIDX-NEXT: v_readfirstlane_b32 s3, v5 +; GPRIDX-NEXT: v_readfirstlane_b32 s4, v6 +; GPRIDX-NEXT: v_readfirstlane_b32 s5, v7 +; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 +; GPRIDX-NEXT: v_readfirstlane_b32 s7, v9 +; GPRIDX-NEXT: v_readfirstlane_b32 s8, v10 +; GPRIDX-NEXT: v_readfirstlane_b32 s9, v11 +; GPRIDX-NEXT: v_readfirstlane_b32 s10, v12 +; GPRIDX-NEXT: v_readfirstlane_b32 s11, v13 +; GPRIDX-NEXT: v_readfirstlane_b32 s12, v14 +; GPRIDX-NEXT: v_readfirstlane_b32 s13, v15 +; GPRIDX-NEXT: v_readfirstlane_b32 s14, v16 +; GPRIDX-NEXT: v_readfirstlane_b32 s15, v17 +; GPRIDX-NEXT: v_readfirstlane_b32 s16, v18 +; GPRIDX-NEXT: v_readfirstlane_b32 s17, v19 +; GPRIDX-NEXT: v_readfirstlane_b32 s18, v20 +; GPRIDX-NEXT: v_readfirstlane_b32 s19, v21 +; GPRIDX-NEXT: v_readfirstlane_b32 s20, v22 +; GPRIDX-NEXT: v_readfirstlane_b32 s21, v23 +; GPRIDX-NEXT: v_readfirstlane_b32 s22, v24 +; GPRIDX-NEXT: v_readfirstlane_b32 s23, v25 +; GPRIDX-NEXT: v_readfirstlane_b32 s24, v26 +; GPRIDX-NEXT: v_readfirstlane_b32 s25, v27 +; GPRIDX-NEXT: v_readfirstlane_b32 s26, v28 +; GPRIDX-NEXT: v_readfirstlane_b32 s27, v29 +; GPRIDX-NEXT: v_readfirstlane_b32 s28, v30 +; GPRIDX-NEXT: v_readfirstlane_b32 s29, v31 +; GPRIDX-NEXT: v_readfirstlane_b32 s30, v32 +; GPRIDX-NEXT: v_readfirstlane_b32 s31, v33 +; GPRIDX-NEXT: ; return to shader part epilog +; +; MOVREL-LABEL: dyn_insertelement_v16f64_s_v_s: +; MOVREL: ; %bb.0: ; %entry +; MOVREL-NEXT: s_mov_b32 s1, s3 +; MOVREL-NEXT: s_mov_b32 s3, s5 +; MOVREL-NEXT: s_mov_b32 s5, s7 +; MOVREL-NEXT: s_mov_b32 s7, s9 +; MOVREL-NEXT: s_mov_b32 s9, s11 +; MOVREL-NEXT: s_mov_b32 s11, s13 +; MOVREL-NEXT: s_mov_b32 s13, s15 +; MOVREL-NEXT: s_mov_b32 s15, s17 +; MOVREL-NEXT: s_mov_b32 s17, s19 +; MOVREL-NEXT: s_mov_b32 s19, s21 +; MOVREL-NEXT: s_mov_b32 s21, s23 +; MOVREL-NEXT: s_mov_b32 s23, s25 +; MOVREL-NEXT: s_mov_b32 s25, s27 +; MOVREL-NEXT: s_mov_b32 s27, s29 +; MOVREL-NEXT: s_mov_b32 s29, s31 +; MOVREL-NEXT: s_mov_b32 s31, s33 +; MOVREL-NEXT: s_mov_b32 s0, s2 +; MOVREL-NEXT: s_mov_b32 s2, s4 +; MOVREL-NEXT: s_mov_b32 s4, s6 +; MOVREL-NEXT: s_mov_b32 s6, s8 +; MOVREL-NEXT: s_mov_b32 s8, s10 +; MOVREL-NEXT: s_mov_b32 s10, s12 +; MOVREL-NEXT: s_mov_b32 s12, s14 +; MOVREL-NEXT: s_mov_b32 s14, s16 +; MOVREL-NEXT: s_mov_b32 s16, s18 +; MOVREL-NEXT: s_mov_b32 s18, s20 +; MOVREL-NEXT: s_mov_b32 s20, s22 +; MOVREL-NEXT: s_mov_b32 s22, s24 +; MOVREL-NEXT: s_mov_b32 s24, s26 +; MOVREL-NEXT: s_mov_b32 s26, s28 +; MOVREL-NEXT: s_mov_b32 s28, s30 +; MOVREL-NEXT: s_mov_b32 s30, s32 +; MOVREL-NEXT: v_mov_b32_e32 v33, s31 +; MOVREL-NEXT: v_mov_b32_e32 v2, s0 +; MOVREL-NEXT: s_lshl_b32 m0, s34, 1 +; MOVREL-NEXT: v_mov_b32_e32 v31, s29 +; MOVREL-NEXT: v_mov_b32_e32 v32, s30 +; MOVREL-NEXT: v_mov_b32_e32 v30, s28 +; MOVREL-NEXT: v_mov_b32_e32 v29, s27 +; MOVREL-NEXT: v_mov_b32_e32 v28, s26 +; MOVREL-NEXT: v_mov_b32_e32 v27, s25 +; MOVREL-NEXT: v_mov_b32_e32 v26, s24 +; MOVREL-NEXT: v_mov_b32_e32 v25, s23 +; MOVREL-NEXT: v_mov_b32_e32 v24, s22 +; MOVREL-NEXT: v_mov_b32_e32 v23, s21 +; MOVREL-NEXT: v_mov_b32_e32 v22, s20 +; MOVREL-NEXT: v_mov_b32_e32 v21, s19 +; MOVREL-NEXT: v_mov_b32_e32 v20, s18 +; MOVREL-NEXT: v_mov_b32_e32 v19, s17 +; MOVREL-NEXT: v_mov_b32_e32 v18, s16 +; MOVREL-NEXT: v_mov_b32_e32 v17, s15 +; MOVREL-NEXT: v_mov_b32_e32 v16, s14 +; MOVREL-NEXT: v_mov_b32_e32 v15, s13 +; MOVREL-NEXT: v_mov_b32_e32 v14, s12 +; MOVREL-NEXT: v_mov_b32_e32 v13, s11 +; MOVREL-NEXT: v_mov_b32_e32 v12, s10 +; MOVREL-NEXT: v_mov_b32_e32 v11, s9 +; MOVREL-NEXT: v_mov_b32_e32 v10, s8 +; MOVREL-NEXT: v_mov_b32_e32 v9, s7 +; MOVREL-NEXT: v_mov_b32_e32 v8, s6 +; MOVREL-NEXT: v_mov_b32_e32 v7, s5 +; MOVREL-NEXT: v_mov_b32_e32 v6, s4 +; MOVREL-NEXT: v_mov_b32_e32 v5, s3 +; MOVREL-NEXT: v_mov_b32_e32 v4, s2 +; MOVREL-NEXT: v_mov_b32_e32 v3, s1 +; MOVREL-NEXT: v_movreld_b32_e32 v2, v0 +; MOVREL-NEXT: v_movreld_b32_e32 v3, v1 +; MOVREL-NEXT: v_readfirstlane_b32 s0, v2 +; MOVREL-NEXT: v_readfirstlane_b32 s1, v3 +; MOVREL-NEXT: v_readfirstlane_b32 s2, v4 +; MOVREL-NEXT: v_readfirstlane_b32 s3, v5 +; MOVREL-NEXT: v_readfirstlane_b32 s4, v6 +; MOVREL-NEXT: v_readfirstlane_b32 s5, v7 +; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 +; MOVREL-NEXT: v_readfirstlane_b32 s7, v9 +; MOVREL-NEXT: v_readfirstlane_b32 s8, v10 +; MOVREL-NEXT: v_readfirstlane_b32 s9, v11 +; MOVREL-NEXT: v_readfirstlane_b32 s10, v12 +; MOVREL-NEXT: v_readfirstlane_b32 s11, v13 +; MOVREL-NEXT: v_readfirstlane_b32 s12, v14 +; MOVREL-NEXT: v_readfirstlane_b32 s13, v15 +; MOVREL-NEXT: v_readfirstlane_b32 s14, v16 +; MOVREL-NEXT: v_readfirstlane_b32 s15, v17 +; MOVREL-NEXT: v_readfirstlane_b32 s16, v18 +; MOVREL-NEXT: v_readfirstlane_b32 s17, v19 +; MOVREL-NEXT: v_readfirstlane_b32 s18, v20 +; MOVREL-NEXT: v_readfirstlane_b32 s19, v21 +; MOVREL-NEXT: v_readfirstlane_b32 s20, v22 +; MOVREL-NEXT: v_readfirstlane_b32 s21, v23 +; MOVREL-NEXT: v_readfirstlane_b32 s22, v24 +; MOVREL-NEXT: v_readfirstlane_b32 s23, v25 +; MOVREL-NEXT: v_readfirstlane_b32 s24, v26 +; MOVREL-NEXT: v_readfirstlane_b32 s25, v27 +; MOVREL-NEXT: v_readfirstlane_b32 s26, v28 +; MOVREL-NEXT: v_readfirstlane_b32 s27, v29 +; MOVREL-NEXT: v_readfirstlane_b32 s28, v30 +; MOVREL-NEXT: v_readfirstlane_b32 s29, v31 +; MOVREL-NEXT: v_readfirstlane_b32 s30, v32 +; MOVREL-NEXT: v_readfirstlane_b32 s31, v33 +; MOVREL-NEXT: ; implicit-def: $vcc_hi +; MOVREL-NEXT: ; return to shader part epilog +entry: + %insert = insertelement <16 x double> %vec, double %val, i32 %idx + ret <16 x double> %insert +} diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -220,42 +220,31 @@ ret void } +; GCN-LABEL: {{^}}double15_extelt: +; GCN-NOT: buffer_ +; GCN-NOT: s_or_b32 +; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] +; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] +; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] +; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] +; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] +define amdgpu_kernel void @double15_extelt(double addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <15 x double> , i32 %sel + store double %ext, double addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}double16_extelt: -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_load_dword -; GCN: buffer_load_dword -; GCN: store_dword +; GCN-NOT: buffer_ +; GCN-NOT: s_or_b32 +; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] +; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] +; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] +; GCN-DAG: v_movrels_b32_e32 v[[RES_HI:[0-9]+]], v[[#BASE+1]] +; GCN: store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RES_LO]]:[[RES_HI]]] define amdgpu_kernel void @double16_extelt(double addrspace(1)* %out, i32 %sel) { entry: %ext = extractelement <16 x double> , i32 %sel @@ -263,6 +252,50 @@ ret void } +; GCN-LABEL: {{^}}float32_extelt: +; GCN-NOT: buffer_ +; GCN-DAG: s_mov_b32 m0, +; GCN-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], 1.0 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40a00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40c00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40e00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41000000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41100000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41200000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41300000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41400000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41500000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41600000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41700000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41800000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41880000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41980000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b80000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41c80000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41d80000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41e80000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f00000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41f80000 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x42000000 +; GCN-DAG: v_movrels_b32_e32 [[RES:v[0-9]+]], [[VLO]] +; GCN: store_dword v[{{[0-9:]+}}], [[RES]] +define amdgpu_kernel void @float32_extelt(float addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <32 x float> , i32 %sel + store float %ext, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}byte8_extelt: ; GCN-NOT: buffer_ ; GCN-DAG: s_mov_b32 s[[SL:[0-9]+]], 0x4030201 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -106,6 +106,15 @@ ret void } +; GCN-LABEL: {{^}}float32_inselt: +; GCN: v_movreld_b32 +define amdgpu_kernel void @float32_inselt(<32 x float> addrspace(1)* %out, <32 x float> %vec, i32 %sel) { +entry: + %v = insertelement <32 x float> %vec, float 1.000000e+00, i32 %sel + store <32 x float> %v, <32 x float> addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}half4_inselt: ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_movrel @@ -298,6 +307,36 @@ ret void } +; GCN-LABEL: {{^}}double16_inselt: +; GCN-NOT: v_cndmask +; GCN-NOT: buffer_ +; GCN-NOT: s_or_b32 +; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] +; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 +; GCN-NOT: s_mov_b32 m0 +; GCN: v_movreld_b32_e32 v[[#BASE+1]], +define amdgpu_kernel void @double16_inselt(<16 x double> addrspace(1)* %out, <16 x double> %vec, i32 %sel) { +entry: + %v = insertelement <16 x double> %vec, double 1.000000e+00, i32 %sel + store <16 x double> %v, <16 x double> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}double15_inselt: +; GCN-NOT: v_cndmask +; GCN-NOT: buffer_ +; GCN-NOT: s_or_b32 +; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] +; GCN-DAG: v_movreld_b32_e32 v[[#BASE:]], 0 +; GCN-NOT: s_mov_b32 m0 +; GCN: v_movreld_b32_e32 v[[#BASE+1]], +define amdgpu_kernel void @double15_inselt(<15 x double> addrspace(1)* %out, <15 x double> %vec, i32 %sel) { +entry: + %v = insertelement <15 x double> %vec, double 1.000000e+00, i32 %sel + store <15 x double> %v, <15 x double> addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}bit4_inselt: ; GCN: buffer_store_byte ; GCN: buffer_load_ubyte diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll @@ -246,7 +246,7 @@ ; GFX908-DAG: v_accvgpr_read_b32 ; GCN: NumVgprs: 256 -; GFX900: ScratchSize: 644 +; GFX900: ScratchSize: 2052 ; GFX908-FIXME: ScratchSize: 0 ; GCN: VGPRBlocks: 63 ; GCN: NumVGPRsForWavesPerEU: 256