diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -738,7 +738,7 @@ return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty); if (Shuffle->changesLength()) - return -1; + BaseT::getUserCost(U, Operands); if (Shuffle->isIdentity()) return 0; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -51,3 +51,11 @@ store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out ret void } + +; assertion test +define amdgpu_kernel void @shufflevector_xxx(<4 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) { + %vec = load <2 x i8>, <2 x i8> addrspace(1)* %vaddr + %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> + store <4 x i8> %shuf, <4 x i8> addrspace(1)* %out + ret void +}