Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1934,6 +1934,15 @@ // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); + + // FIXME: This is to handle i1 vectors with elements promoted to i8. + // i1 vector handling needs general improvement. + if (N->getValueType(0).bitsLT(EltVT)) { + SDValue Load = DAG.getLoad(EltVT, dl, Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + return DAG.getZExtOrTrunc(Load, dl, N->getValueType(0)); + } + return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); Index: llvm/trunk/test/CodeGen/AMDGPU/extract_vector_dynelt.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ llvm/trunk/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s + +; GCN-LABEL: {{^}}bit4_extelt: +; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 +; GCN-DAG: buffer_store_byte [[ZERO]], +; GCN-DAG: buffer_store_byte [[ONE]], +; GCN-DAG: buffer_store_byte [[ZERO]], +; GCN-DAG: buffer_store_byte [[ONE]], +; GCN: buffer_load_ubyte [[LOAD:v[0-9]+]], +; GCN: v_and_b32_e32 [[RES:v[0-9]+]], 1, [[LOAD]] +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RES]] +define amdgpu_kernel void @bit4_extelt(i32 addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <4 x i1> , i32 %sel + %zext = zext i1 %ext to i32 + store i32 %zext, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}bit128_extelt: +define amdgpu_kernel void @bit128_extelt(i32 addrspace(1)* %out, i32 %sel) { +entry: + %ext = extractelement <128 x i1> , i32 %sel + %zext = zext i1 %ext to i32 + store i32 %zext, i32 addrspace(1)* %out + ret void +}