Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12265,6 +12265,9 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + if (OriginalLoad->isVolatile()) + return SDValue(); + EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); Index: test/CodeGen/AMDGPU/extractelt-to-trunc.ll =================================================================== --- test/CodeGen/AMDGPU/extractelt-to-trunc.ll +++ test/CodeGen/AMDGPU/extractelt-to-trunc.ll @@ -41,3 +41,37 @@ store float %extract, float addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}no_extract_volatile_load_extract0: +; GCN: buffer_load_dwordx4 +; GCN: buffer_store_dword v +define void @no_extract_volatile_load_extract0(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +entry: + %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %elt0 = extractelement <4 x i32> %vec, i32 0 + store i32 %elt0, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}no_extract_volatile_load_extract2: +; GCN: buffer_load_dwordx4 +; GCN: buffer_store_dword v + +define void @no_extract_volatile_load_extract2(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +entry: + %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %elt2 = extractelement <4 x i32> %vec, i32 2 + store i32 %elt2, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}no_extract_volatile_load_dynextract: +; GCN: buffer_load_dwordx4 +; GCN: buffer_store_dword v +define void @no_extract_volatile_load_dynextract(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) { +entry: + %vec = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in + %eltN = extractelement <4 x i32> %vec, i32 %idx + store i32 %eltN, i32 addrspace(1)* %out + ret void +}