diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -738,7 +738,7 @@ return getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty); if (Shuffle->changesLength()) - return -1; + BaseT::getUserCost(U, Operands); if (Shuffle->isIdentity()) return 0; diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -1,7 +1,7 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN %s -; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN %s -; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN,TPT %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN,TPT %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN,CS %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN,CS %s ; GCN-LABEL: 'shufflevector_00_v2i16' ; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer @@ -51,3 +51,14 @@ store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out ret void } + +; GCN-LABEL: 'shufflevector_xxx' +; TPT: Unknown cost for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> +; CS: estimated cost of 8 for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> +; Should not assert +define amdgpu_kernel void @shufflevector_xxx(<4 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) { + %vec = load <2 x i8>, <2 x i8> addrspace(1)* %vaddr + %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> + store <4 x i8> %shuf, <4 x i8> addrspace(1)* %out + ret void +}