diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5733,11 +5733,11 @@ // Convert vector index to bit-index and get the required bit mask. assert(isPowerOf2_32(EltSize)); + const auto EltMask = maskTrailingOnes(EltSize); SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32); SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor); SDValue BFM = DAG.getNode(ISD::SHL, SL, IntVT, - DAG.getConstant(0xffff, SL, IntVT), - ScaledIdx); + DAG.getConstant(EltMask, SL, IntVT), ScaledIdx); // 1. Create a congruent vector with the target value in each element. SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT, diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -447,7 +447,7 @@ ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dword s6, s[0:1], 0x34 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GCN-NEXT: s_mov_b64 s[4:5], 0xffff +; GCN-NEXT: s_mov_b64 s[4:5], 0xff ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_lshl_b32 s6, s6, 3 ; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1033,7 +1033,7 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshl_b32 s5, s6, 3 -; SI-NEXT: s_lshl_b32 s5, -1, s5 +; SI-NEXT: s_lshl_b32 s5, 0xff, s5 ; SI-NEXT: s_andn2_b32 s4, s4, s5 ; SI-NEXT: s_and_b32 s5, s5, 0x505 ; SI-NEXT: s_or_b32 s4, s5, s4 @@ -1046,14 +1046,15 @@ ; VI-NEXT: s_load_dword s6, s[4:5], 0x4c ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; VI-NEXT: s_load_dword s4, s[4:5], 0x28 +; VI-NEXT: v_mov_b32_e32 v0, 0xff ; VI-NEXT: s_mov_b32 s3, 0x1100f000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s5, s6, 3 -; VI-NEXT: v_lshlrev_b16_e64 v0, s5, -1 +; VI-NEXT: v_lshlrev_b16_e32 v0, s5, v0 ; VI-NEXT: v_not_b32_e32 v1, v0 ; VI-NEXT: v_and_b32_e32 v1, s4, v1 ; VI-NEXT: v_and_b32_e32 v0, 0x505, v0 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm @@ -1074,7 +1075,7 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshl_b32 s5, s6, 3 -; SI-NEXT: s_lshl_b32 s5, 0xffff, s5 +; SI-NEXT: s_lshl_b32 s5, 0xff, s5 ; SI-NEXT: s_andn2_b32 s4, s4, s5 ; SI-NEXT: s_and_b32 s5, s5, 0x5050505 ; SI-NEXT: s_or_b32 s4, s5, s4 @@ -1094,7 +1095,7 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s5, s6, 3 -; VI-NEXT: s_lshl_b32 s5, 0xffff, s5 +; VI-NEXT: s_lshl_b32 s5, 0xff, s5 ; VI-NEXT: s_andn2_b32 s4, s4, s5 ; VI-NEXT: s_and_b32 s5, s5, 0x5050505 ; VI-NEXT: s_or_b32 s4, s5, s4 @@ -1119,7 +1120,7 @@ ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshl_b32 s5, s6, 3 -; SI-NEXT: s_lshl_b32 s5, 0xffff, s5 +; SI-NEXT: s_lshl_b32 s5, 0xff, s5 ; SI-NEXT: s_andn2_b32 s4, s4, s5 ; SI-NEXT: s_and_b32 s5, s5, 0x5050505 ; SI-NEXT: s_or_b32 s4, s5, s4 @@ -1136,7 +1137,7 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_lshl_b32 s5, s6, 3 -; VI-NEXT: s_lshl_b32 s5, 0xffff, s5 +; VI-NEXT: s_lshl_b32 s5, 0xff, s5 ; VI-NEXT: s_andn2_b32 s4, s4, s5 ; VI-NEXT: s_and_b32 s5, s5, 0x5050505 ; VI-NEXT: s_or_b32 s4, s5, s4 @@ -1160,7 +1161,7 @@ ; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; SI-NEXT: s_lshl_b32 s8, s8, 3 -; SI-NEXT: s_mov_b64 s[2:3], 0xffff +; SI-NEXT: s_mov_b64 s[2:3], 0xff ; SI-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 ; SI-NEXT: s_and_b32 s9, s3, 0x5050505 ; SI-NEXT: s_and_b32 s8, s2, 0x5050505 @@ -1183,7 +1184,7 @@ ; VI-NEXT: s_mov_b32 s5, s1 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; VI-NEXT: s_lshl_b32 s8, s8, 3 -; VI-NEXT: s_mov_b64 s[2:3], 0xffff +; VI-NEXT: s_mov_b64 s[2:3], 0xff ; VI-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 ; VI-NEXT: s_and_b32 s9, s3, 0x5050505 ; VI-NEXT: s_and_b32 s8, s2, 0x5050505