diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -51,6 +51,19 @@ ret float %elt0 } +define amdgpu_ps float @extract_elt0_freeze_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { +; CHECK-LABEL: @extract_elt0_freeze_buffer_load_v2f32( +; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i1 false, i1 false) +; CHECK-NEXT: [[FREEZE:%.*]] = freeze <2 x float> [[DATA]] +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[FREEZE]], i64 0 +; CHECK-NEXT: ret float [[ELT0]] +; + %data = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i1 false, i1 false) + %freeze = freeze <2 x float> %data + %elt0 = extractelement <2 x float> %freeze, i32 0 + ret float %elt0 +} + define amdgpu_ps float @extract_elt1_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs) #0 { ; CHECK-LABEL: @extract_elt1_buffer_load_v2f32( ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i1 false, i1 false) diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -1159,3 +1159,16 @@ call void @use(<2 x i4> %b_xshuf_y) ret i4 %b_xy0 } + +define i32 @test_freeze(<4 x i32> %v, i32 %x) { +; CHECK-LABEL: @test_freeze( +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[X:%.*]], i64 0 +; CHECK-NEXT: [[FR:%.*]] = freeze <4 x i32> [[INS]] +; CHECK-NEXT: [[RET:%.*]] = extractelement <4 x i32> [[FR]], i64 0 +; CHECK-NEXT: ret i32 [[RET]] +; + %ins = insertelement <4 x i32> %v, i32 %x, i64 0 + %fr = freeze <4 x i32> %ins + %ret = extractelement <4 x i32> %fr, i64 0 + ret i32 %ret +}