Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -3879,6 +3879,11 @@ if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue())) return Elt; + // An undef extract index can be arbitrarily chosen to be an out-of-range + // index value, which would result in the instruction being undef. + if (isa(Idx)) + return UndefValue::get(Vec->getType()->getVectorElementType()); + return nullptr; } Index: test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll @@ -17,3 +17,29 @@ ret void } +; Make sure we don't hit use of undefined register errors when expanding an +; extract with undef index. + +; CHECK-LABEL: {{^}}extract_adjacent_blocks: +define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 { +bb: + %tmp = icmp eq i32 %arg, 0 + br i1 %tmp, label %bb1, label %bb4 + +bb1: + %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef + %tmp3 = extractelement <4 x float> %tmp2, i32 undef + call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out + br label %bb7 + +bb4: + %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef + %tmp6 = extractelement <4 x float> %tmp5, i32 undef + call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out + br label %bb7 + +bb7: + %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] + store volatile float %tmp8, float addrspace(1)* undef + ret void +} Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -469,55 +469,6 @@ ret void } -; GCN-LABEL: {{^}}extract_adjacent_blocks: -; GCN: s_load_dword [[ARG:s[0-9]+]] -; GCN: s_cmp_lg_u32 -; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]] - -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movrels_b32_e32 - -; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0 -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]] - -; GCN: [[BB4]]: -; GCN: buffer_load_dwordx4 -; MOVREL: s_mov_b32 m0, -; MOVREL: v_movrels_b32_e32 - -; IDXMODE: s_set_gpr_idx_on -; IDXMODE: v_mov_b32_e32 -; IDXMODE: s_set_gpr_idx_off - -; GCN: [[ENDBB]]: -; GCN: buffer_store_dword -; GCN: s_endpgm -define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 { -bb: - %tmp = icmp eq i32 %arg, 0 - br i1 %tmp, label %bb1, label %bb4 - -bb1: - %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef - %tmp3 = extractelement <4 x float> %tmp2, i32 undef - call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out - br label %bb7 - -bb4: - %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef - %tmp6 = extractelement <4 x float> %tmp5, i32 undef - call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out - br label %bb7 - -bb7: - %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ] - store volatile float %tmp8, float addrspace(1)* undef - ret void -} ; GCN-LABEL: {{^}}insert_adjacent_blocks: ; GCN: s_load_dword [[ARG:s[0-9]+]] Index: test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll =================================================================== --- test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll +++ test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll @@ -46,3 +46,10 @@ ; CHECK-NEXT: %[[extract:.*]] = extractelement <8 x i8> %[[add]], i32 6 ; CHECK-NEXT: ret i8 %[[extract]] } + +define i32 @test5(<4 x i32> %V) { + %extract = extractelement <4 x i32> %V, i32 undef + ret i32 %extract +} +; CHECK-LABEL: @test5( +; CHECK: ret i32 undef