Index: llvm/test/Transforms/VectorCombine/AArch64/load-insert-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/VectorCombine/AArch64/load-insert-store.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -mtriple=aarch64 -passes=vector-combine -S %s | FileCheck %s + +;; Whether these tests can be optimized depends on AArch64's TTI. + +define void @load_insert_x2_store(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define void @load_insert_x2_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: store <16 x i16> [[T2]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + store <16 x i16> %t2, ptr %A + ret void +} + +define <16 x i16> @load_insert_x2_store_multiple_uses(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define <16 x i16> @load_insert_x2_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: store <16 x i16> [[T2]], ptr [[A]], align 32 +; CHECK-NEXT: ret <16 x i16> [[T2]] +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + store <16 x i16> %t2, ptr %A + ret <16 x i16> %t2 +} + +define void @load_insert_x3_store(ptr %A, i16 %B, i16 %C, i16 %D) { +; CHECK-LABEL: define void @load_insert_x3_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]], i16 [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <16 x i16> [[T2]], i16 [[D]], i32 8 +; CHECK-NEXT: store <16 x i16> [[T3]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + %t3 = insertelement <16 x i16> %t2, i16 %D, i32 8 + store <16 x i16> %t3, ptr %A + ret void +} + +define <8 x float> @load_insert_x3_store_multiple_uses(ptr %A, float %B, float %C, float %D) { +; CHECK-LABEL: define <8 x float> @load_insert_x3_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <8 x float>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <8 x float> [[T0]], float [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x float> [[T1]], float [[C]], i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x float> [[T2]], float [[D]], i32 8 +; CHECK-NEXT: store <8 x float> [[T3]], ptr [[A]], align 32 +; CHECK-NEXT: ret <8 x float> [[T3]] +; +entry: + %t0 = load <8 x float>, ptr %A + %t1 = insertelement <8 x float> %t0, float %B, i32 4 + %t2 = insertelement <8 x float> %t1, float %C, i32 6 + %t3 = insertelement <8 x float> %t2, float %D, i32 8 + store <8 x float> %t3, ptr %A + ret <8 x float> %t3 +} + +define void @load_insert_x4_store(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define void @load_insert_x4_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[B]], i32 5 +; CHECK-NEXT: [[T3:%.*]] = insertelement <16 x i16> [[T2]], i16 [[C]], i32 6 +; CHECK-NEXT: [[T4:%.*]] = insertelement <16 x i16> [[T3]], i16 [[C]], i32 7 +; CHECK-NEXT: store <16 x i16> [[T4]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %B, i32 5 + %t3 = insertelement <16 x i16> %t2, i16 %C, i32 6 + %t4 = insertelement <16 x i16> %t3, i16 %C, i32 7 + store <16 x i16> %t4, ptr %A + ret void +} + +define <8 x float> @load_insert_x4_store_multiple_uses(ptr %A, float %B, float %C) { +; CHECK-LABEL: define <8 x float> @load_insert_x4_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], float [[B:%.*]], float [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <8 x float>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <8 x float> [[T0]], float [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x float> [[T1]], float [[B]], i32 5 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x float> [[T2]], float [[C]], i32 6 +; CHECK-NEXT: [[T4:%.*]] = insertelement <8 x float> [[T3]], float [[C]], i32 7 +; CHECK-NEXT: store <8 x float> [[T4]], ptr [[A]], align 32 +; CHECK-NEXT: ret <8 x float> [[T4]] +; +entry: + %t0 = load <8 x float>, ptr %A + %t1 = insertelement <8 x float> %t0, float %B, i32 4 + %t2 = insertelement <8 x float> %t1, float %B, i32 5 + %t3 = insertelement <8 x float> %t2, float %C, i32 6 + %t4 = insertelement <8 x float> %t3, float %C, i32 7 + store <8 x float> %t4, ptr %A + ret <8 x float> %t4 +} Index: llvm/test/Transforms/VectorCombine/RISCV/load-insert-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/VectorCombine/RISCV/load-insert-store.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -mtriple=riscv64 -mattr=+v -passes=vector-combine -S %s | FileCheck %s + +;; Whether these tests can be optimized depends on RISCV's TTI. + +define void @load_insert_x2_store(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define void @load_insert_x2_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: store <16 x i16> [[T2]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + store <16 x i16> %t2, ptr %A + ret void +} + +define <16 x i16> @load_insert_x2_store_multiple_uses(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define <16 x i16> @load_insert_x2_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: store <16 x i16> [[T2]], ptr [[A]], align 32 +; CHECK-NEXT: ret <16 x i16> [[T2]] +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + store <16 x i16> %t2, ptr %A + ret <16 x i16> %t2 +} + +define void @load_insert_x3_store(ptr %A, i16 %B, i16 %C, i16 %D) { +; CHECK-LABEL: define void @load_insert_x3_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]], i16 [[D:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <16 x i16> [[T2]], i16 [[D]], i32 8 +; CHECK-NEXT: store <16 x i16> [[T3]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + %t3 = insertelement <16 x i16> %t2, i16 %D, i32 8 + store <16 x i16> %t3, ptr %A + ret void +} + +define <8 x float> @load_insert_x3_store_multiple_uses(ptr %A, float %B, float %C, float %D) { +; CHECK-LABEL: define <8 x float> @load_insert_x3_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <8 x float>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <8 x float> [[T0]], float [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x float> [[T1]], float [[C]], i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x float> [[T2]], float [[D]], i32 8 +; CHECK-NEXT: store <8 x float> [[T3]], ptr [[A]], align 32 +; CHECK-NEXT: ret <8 x float> [[T3]] +; +entry: + %t0 = load <8 x float>, ptr %A + %t1 = insertelement <8 x float> %t0, float %B, i32 4 + %t2 = insertelement <8 x float> %t1, float %C, i32 6 + %t3 = insertelement <8 x float> %t2, float %D, i32 8 + store <8 x float> %t3, ptr %A + ret <8 x float> %t3 +} + +define void @load_insert_x4_store(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define void @load_insert_x4_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[B]], i32 5 +; CHECK-NEXT: [[T3:%.*]] = insertelement <16 x i16> [[T2]], i16 [[C]], i32 6 +; CHECK-NEXT: [[T4:%.*]] = insertelement <16 x i16> [[T3]], i16 [[C]], i32 7 +; CHECK-NEXT: store <16 x i16> [[T4]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %B, i32 5 + %t3 = insertelement <16 x i16> %t2, i16 %C, i32 6 + %t4 = insertelement <16 x i16> %t3, i16 %C, i32 7 + store <16 x i16> %t4, ptr %A + ret void +} + +define <8 x float> @load_insert_x4_store_multiple_uses(ptr %A, float %B, float %C) { +; CHECK-LABEL: define <8 x float> @load_insert_x4_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], float [[B:%.*]], float [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <8 x float>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <8 x float> [[T0]], float [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x float> [[T1]], float [[B]], i32 5 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x float> [[T2]], float [[C]], i32 6 +; CHECK-NEXT: [[T4:%.*]] = insertelement <8 x float> [[T3]], float [[C]], i32 7 +; CHECK-NEXT: store <8 x float> [[T4]], ptr [[A]], align 32 +; CHECK-NEXT: ret <8 x float> [[T4]] +; +entry: + %t0 = load <8 x float>, ptr %A + %t1 = insertelement <8 x float> %t0, float %B, i32 4 + %t2 = insertelement <8 x float> %t1, float %B, i32 5 + %t3 = insertelement <8 x float> %t2, float %C, i32 6 + %t4 = insertelement <8 x float> %t3, float %C, i32 7 + store <8 x float> %t4, ptr %A + ret <8 x float> %t4 +} Index: llvm/test/Transforms/VectorCombine/X86/load-insert-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/VectorCombine/X86/load-insert-store.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64 -mattr=AVX2 -S %s | FileCheck %s + +;; Whether these tests can be optimized depends on x86_64's TTI. + +define void @load_insert_x2_store(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define void @load_insert_x2_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: store <16 x i16> [[T2]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + store <16 x i16> %t2, ptr %A + ret void +} + +define <16 x i16> @load_insert_x2_store_multiple_uses(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define <16 x i16> @load_insert_x2_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: store <16 x i16> [[T2]], ptr [[A]], align 32 +; CHECK-NEXT: ret <16 x i16> [[T2]] +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + store <16 x i16> %t2, ptr %A + ret <16 x i16> %t2 +} + +define void @load_insert_x3_store(ptr %A, i16 %B, i16 %C, i16 %D) { +; CHECK-LABEL: define void @load_insert_x3_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]], i16 [[D:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[C]], i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <16 x i16> [[T2]], i16 [[D]], i32 8 +; CHECK-NEXT: store <16 x i16> [[T3]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %C, i32 6 + %t3 = insertelement <16 x i16> %t2, i16 %D, i32 8 + store <16 x i16> %t3, ptr %A + ret void +} + +define <8 x float> @load_insert_x3_store_multiple_uses(ptr %A, float %B, float %C, float %D) { +; CHECK-LABEL: define <8 x float> @load_insert_x3_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], float [[B:%.*]], float [[C:%.*]], float [[D:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <8 x float>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <8 x float> [[T0]], float [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x float> [[T1]], float [[C]], i32 6 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x float> [[T2]], float [[D]], i32 8 +; CHECK-NEXT: store <8 x float> [[T3]], ptr [[A]], align 32 +; CHECK-NEXT: ret <8 x float> [[T3]] +; +entry: + %t0 = load <8 x float>, ptr %A + %t1 = insertelement <8 x float> %t0, float %B, i32 4 + %t2 = insertelement <8 x float> %t1, float %C, i32 6 + %t3 = insertelement <8 x float> %t2, float %D, i32 8 + store <8 x float> %t3, ptr %A + ret <8 x float> %t3 +} + +define void @load_insert_x4_store(ptr %A, i16 %B, i16 %C) { +; CHECK-LABEL: define void @load_insert_x4_store +; CHECK-SAME: (ptr [[A:%.*]], i16 [[B:%.*]], i16 [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <16 x i16>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <16 x i16> [[T0]], i16 [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <16 x i16> [[T1]], i16 [[B]], i32 5 +; CHECK-NEXT: [[T3:%.*]] = insertelement <16 x i16> [[T2]], i16 [[C]], i32 6 +; CHECK-NEXT: [[T4:%.*]] = insertelement <16 x i16> [[T3]], i16 [[C]], i32 7 +; CHECK-NEXT: store <16 x i16> [[T4]], ptr [[A]], align 32 +; CHECK-NEXT: ret void +; +entry: + %t0 = load <16 x i16>, ptr %A + %t1 = insertelement <16 x i16> %t0, i16 %B, i32 4 + %t2 = insertelement <16 x i16> %t1, i16 %B, i32 5 + %t3 = insertelement <16 x i16> %t2, i16 %C, i32 6 + %t4 = insertelement <16 x i16> %t3, i16 %C, i32 7 + store <16 x i16> %t4, ptr %A + ret void +} + +define <8 x float> @load_insert_x4_store_multiple_uses(ptr %A, float %B, float %C) { +; CHECK-LABEL: define <8 x float> @load_insert_x4_store_multiple_uses +; CHECK-SAME: (ptr [[A:%.*]], float [[B:%.*]], float [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T0:%.*]] = load <8 x float>, ptr [[A]], align 32 +; CHECK-NEXT: [[T1:%.*]] = insertelement <8 x float> [[T0]], float [[B]], i32 4 +; CHECK-NEXT: [[T2:%.*]] = insertelement <8 x float> [[T1]], float [[B]], i32 5 +; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x float> [[T2]], float [[C]], i32 6 +; CHECK-NEXT: [[T4:%.*]] = insertelement <8 x float> [[T3]], float [[C]], i32 7 +; CHECK-NEXT: store <8 x float> [[T4]], ptr [[A]], align 32 +; CHECK-NEXT: ret <8 x float> [[T4]] +; +entry: + %t0 = load <8 x float>, ptr %A + %t1 = insertelement <8 x float> %t0, float %B, i32 4 + %t2 = insertelement <8 x float> %t1, float %B, i32 5 + %t3 = insertelement <8 x float> %t2, float %C, i32 6 + %t4 = insertelement <8 x float> %t3, float %C, i32 7 + store <8 x float> %t4, ptr %A + ret <8 x float> %t4 +} Index: llvm/test/Transforms/VectorCombine/load-insert-store.ll =================================================================== --- llvm/test/Transforms/VectorCombine/load-insert-store.ll +++ llvm/test/Transforms/VectorCombine/load-insert-store.ll @@ -16,6 +16,23 @@ ret void } +; To verify the case that there are multiple uses of the vector. +define <16 x i8> @insert_store_multiple_uses(ptr %q, i8 zeroext %s) { +; CHECK-LABEL: @insert_store_multiple_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[Q:%.*]], align 16 +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[S:%.*]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i8>, ptr [[Q]], i32 0, i32 3 +; CHECK-NEXT: store i8 [[S]], ptr [[TMP1]], align 1 +; CHECK-NEXT: ret <16 x i8> [[VECINS]] +; +entry: + %0 = load <16 x i8>, ptr %q + %vecins = insertelement <16 x i8> %0, i8 %s, i32 3 + store <16 x i8> %vecins, ptr %q, align 16 + ret <16 x i8> %vecins +} + define void @insert_store_i16_align1(ptr %q, i16 zeroext %s) { ; CHECK-LABEL: @insert_store_i16_align1( ; CHECK-NEXT: entry: