diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca -amdgpu-promote-alloca-to-vector-limit=512 < %s | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" @@ -332,3 +332,48 @@ store <3 x i32> %val, ptr addrspace(5) %stack ret void } + +define void @test_accessty_too_small(<2 x i16> %val) { +; CHECK-LABEL: define void @test_accessty_too_small +; CHECK-SAME: (<2 x i16> [[VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5) +; CHECK-NEXT: store <2 x i16> [[VAL]], ptr addrspace(5) [[STACK]], align 4 +; CHECK-NEXT: ret void +; +entry: + %stack = alloca [4 x i64], align 4, addrspace(5) + store <2 x i16> %val, ptr addrspace(5) %stack + ret void +} + +define void @store_2xi32_into_double(double %foo) { +; CHECK-LABEL: define void @store_2xi32_into_double +; CHECK-SAME: (double [[FOO:%.*]]) { +; CHECK-NEXT: [[DUMMYUSER0:%.*]] = freeze double 0x5F0000005E +; CHECK-NEXT: [[DUMMYUSER1:%.*]] = freeze double undef +; CHECK-NEXT: [[DUMMYUSER2:%.*]] = freeze double undef +; CHECK-NEXT: [[DUMMYUSER3:%.*]] = freeze double undef +; CHECK-NEXT: ret void +; + %alloca = alloca [9 x double], align 8, addrspace(5) + + store <2 x i32> , ptr addrspace(5) %alloca, align 8 + %load0 = load double, ptr addrspace(5) %alloca, align 8 + %dummyuser0 = freeze double %load0 + + %idx4 = getelementptr inbounds [9 x double], ptr addrspace(5) %alloca, i32 0, i32 4 + %idx5 = getelementptr inbounds [9 x double], ptr addrspace(5) %alloca, i32 0, i32 5 + store <4 x i32> , ptr addrspace(5) %idx4, align 8 + %load1 = load double, ptr addrspace(5) %idx4, align 8 + %dummyuser1 = freeze double %load1 + %load2 = load double, ptr addrspace(5) %idx5, align 8 + %dummyuser2 = freeze double %load2 + + %idx8 = getelementptr inbounds [9 x double], ptr addrspace(5) %alloca, i32 0, i32 8 + store <2 x i32> , ptr addrspace(5) %idx8, align 8 + %load3 = load double, ptr addrspace(5) %idx8, align 8 + %dummyuser3 = freeze double %load3 + + ret void +}