Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1172,9 +1172,11 @@ assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == Op.getValueSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); - Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, - DAG.getConstant(LoVT.getSizeInBits(), dl, - TLI.getPointerTy(DAG.getDataLayout()))); + Hi = + DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), dl, + TLI.getScalarShiftAmountTy( + DAG.getDataLayout(), Op.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" namespace llvm { @@ -664,8 +665,14 @@ void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override; - MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { - return MVT::i8; + // For i512, DAGTypeLegalizer::SplitInteger needs a shift amount 256, + // which cannot be held by i8, therefore use i16 instead. In all the + // other situations i8 is sufficient. + MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { + MVT T = VT.getSizeInBits() >= 512 ? MVT::i16 : MVT::i8; + assert((VT.getSizeInBits() + 1) / 2 < (1U << T.getSizeInBits()) && + "Scalar shift amount type too small"); + return T; } const MCExpr * Index: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -1,6 +1,6 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=fiji -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=VI -check-prefix=GFX89 %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=hawaii -enable-amdgpu-aa=0 -mattr=+flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=CIVI -check-prefix=CI %s ; GCN-LABEL: {{^}}s_insertelement_v2i16_0: ; GCN: s_load_dword [[VEC:s[0-9]+]]