diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -427,7 +427,7 @@ } if (Triple.isOSAIX() || Triple.isOSLinux()) - DataLayout += "-v256:256:256-v512:512:512"; + DataLayout += "-S128-v256:256:256-v512:512:512"; resetDataLayout(DataLayout); // PPC64 supports atomics up to 8 bytes. diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -140,27 +140,27 @@ // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC64-LINUX -// PPC64-LINUX: target datalayout = "E-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +// PPC64-LINUX: target datalayout = "E-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu future %s | \ // RUN: FileCheck %s -check-prefix=PPC64-FUTURE -// PPC64-FUTURE: target datalayout = "E-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +// PPC64-FUTURE: target datalayout = "E-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu pwr10 %s | \ // RUN: FileCheck %s -check-prefix=PPC64-P10 -// PPC64-P10: target datalayout = "E-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +// PPC64-P10: target datalayout = "E-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=PPC64LE-LINUX -// PPC64LE-LINUX: target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +// PPC64LE-LINUX: target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu future %s | \ // RUN: FileCheck %s -check-prefix=PPC64LE-FUTURE -// PPC64LE-FUTURE: target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +// PPC64LE-FUTURE: target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu pwr10 %s | \ // RUN: FileCheck %s -check-prefix=PPC64LE-P10 -// PPC64LE-P10: target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +// PPC64LE-P10: target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" // RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=NVPTX diff --git a/lld/test/ELF/common-archive-lookup.s b/lld/test/ELF/common-archive-lookup.s --- a/lld/test/ELF/common-archive-lookup.s +++ b/lld/test/ELF/common-archive-lookup.s @@ -162,13 +162,13 @@ #--- blockdata.ll -target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64le-unknown-linux-gnu" @block = dso_local local_unnamed_addr global [5 x i32] [i32 5, i32 0, i32 0, i32 0, i32 0], align 4 #--- commonblock.ll -target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" +target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" target triple = "powerpc64le-unknown-linux-gnu" @block = common dso_local local_unnamed_addr global [5 x i32] zeroinitializer, align 4 diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -157,9 +157,8 @@ // Specify the vector alignment explicitly. For v256i1 and v512i1, the // calculated alignment would be 256*alignment(i1) and 512*alignment(i1), // which is 256 and 512 bytes - way over aligned. - if ((T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppc64) && - (T.isOSAIX() || T.isOSLinux())) - Ret += "-v256:256:256-v512:512:512"; + if (is64Bit && (T.isOSAIX() || T.isOSLinux())) + Ret += "-S128-v256:256:256-v512:512:512"; return Ret; } diff --git a/llvm/test/CodeGen/PowerPC/32byte-aligned-vector-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/32byte-aligned-vector-stack-alignment.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/32byte-aligned-vector-stack-alignment.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE + +; This tests for 32-byte stack allocated vectors explicitly aligned to 32 bytes +; with __attribute__((aligned(32))). + +define dso_local signext i32 @test_vector_aligned_32bytes() local_unnamed_addr nounwind { +; CHECK-LE-LABEL: test_vector_aligned_32bytes: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mflr r0 +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: std r0, 16(r1) +; CHECK-LE-NEXT: clrldi r0, r1, 59 +; CHECK-LE-NEXT: subfic r0, r0, -64 +; CHECK-LE-NEXT: stdux r1, r1, r0 +; CHECK-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-NEXT: lvx v2, 0, r3 +; CHECK-LE-NEXT: addi r3, r1, 32 +; CHECK-LE-NEXT: stvx v2, 0, r3 +; CHECK-LE-NEXT: bl add +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: lwa r3, 32(r1) +; CHECK-LE-NEXT: mr r1, r30 +; CHECK-LE-NEXT: ld r0, 16(r1) +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: mtlr r0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vector_aligned_32bytes: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: std r0, 16(r1) +; CHECK-BE-NEXT: clrldi r0, r1, 59 +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: subfic r0, r0, -160 +; CHECK-BE-NEXT: stdux r1, r1, r0 +; CHECK-BE-NEXT: lis r3, -16384 +; CHECK-BE-NEXT: lis r4, -32768 +; CHECK-BE-NEXT: ori r3, r3, 1 +; CHECK-BE-NEXT: ori r4, r4, 1 +; CHECK-BE-NEXT: rldic r3, r3, 2, 30 +; CHECK-BE-NEXT: rldic r4, r4, 1, 31 +; CHECK-BE-NEXT: std r3, 136(r1) +; CHECK-BE-NEXT: addi r3, r1, 128 +; CHECK-BE-NEXT: std r4, 128(r1) +; CHECK-BE-NEXT: bl add +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: lwa r3, 128(r1) +; CHECK-BE-NEXT: mr r1, r30 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr +entry: + %a = alloca <4 x i32>, align 32 + %0 = bitcast <4 x i32>* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) + store <4 x i32> , <4 x i32>* %a, align 32 + %call = call signext i32 @add(<4 x i32>* nonnull %a) + %1 = load <4 x i32>, <4 x i32>* %a, align 32 + %vecext = extractelement <4 x i32> %1, i32 0 + call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) + ret i32 %vecext +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) nounwind + +declare signext i32 @add(<4 x i32>*) local_unnamed_addr nounwind + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) nounwind diff --git a/llvm/test/CodeGen/PowerPC/32byte-array-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/32byte-array-stack-alignment.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/32byte-array-stack-alignment.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE + +; This tests for 32-byte stack allocated arrays. + +@__const.test_stack_array.Arr = private unnamed_addr constant [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8], align 4 + +define dso_local signext i32 @test_stack_array() local_unnamed_addr nounwind { +; CHECK-LE-LABEL: test_stack_array: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mflr r0 +; CHECK-LE-NEXT: std r0, 16(r1) +; CHECK-LE-NEXT: stdu r1, -64(r1) +; CHECK-LE-NEXT: addis r3, r2, .L__const.test_stack_array.Arr@toc@ha +; CHECK-LE-NEXT: li r4, 16 +; CHECK-LE-NEXT: addi r3, r3, .L__const.test_stack_array.Arr@toc@l +; CHECK-LE-NEXT: lxvd2x vs0, r3, r4 +; CHECK-LE-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-NEXT: addi r3, r1, 32 +; CHECK-LE-NEXT: stxvd2x vs0, r3, r4 +; CHECK-LE-NEXT: stxvd2x vs1, 0, r3 +; CHECK-LE-NEXT: bl add +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: lwa r3, 32(r1) +; CHECK-LE-NEXT: addi r1, r1, 64 +; CHECK-LE-NEXT: ld r0, 16(r1) +; CHECK-LE-NEXT: mtlr r0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_stack_array: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: std r0, 16(r1) +; CHECK-BE-NEXT: stdu r1, -144(r1) +; CHECK-BE-NEXT: addis r3, r2, .L__const.test_stack_array.Arr@toc@ha +; CHECK-BE-NEXT: addi r4, r3, .L__const.test_stack_array.Arr@toc@l +; CHECK-BE-NEXT: ld r3, .L__const.test_stack_array.Arr@toc@l(r3) +; CHECK-BE-NEXT: ld r5, 24(r4) +; CHECK-BE-NEXT: ld r6, 16(r4) +; CHECK-BE-NEXT: ld r4, 8(r4) +; CHECK-BE-NEXT: std r3, 112(r1) +; CHECK-BE-NEXT: addi r3, r1, 112 +; CHECK-BE-NEXT: std r5, 136(r1) +; CHECK-BE-NEXT: std r6, 128(r1) +; CHECK-BE-NEXT: std r4, 120(r1) +; CHECK-BE-NEXT: bl add +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: lwa r3, 112(r1) +; CHECK-BE-NEXT: addi r1, r1, 144 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr +entry: + %Arr = alloca [8 x i32], align 4 + %0 = bitcast [8 x i32]* %Arr to i8* + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(32) %0, i8* nonnull align 4 dereferenceable(32) bitcast ([8 x i32]* @__const.test_stack_array.Arr to i8*), i64 32, i1 false) + %arraydecay = getelementptr inbounds [8 x i32], [8 x i32]* %Arr, i64 0, i64 0 + %call = call signext i32 @add(i32* nonnull %arraydecay) + %1 = load i32, i32* %arraydecay, align 4 + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) + ret i32 %1 +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) nounwind + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) nounwind + +declare signext i32 @add(i32*) local_unnamed_addr nounwind + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) nounwind diff --git a/llvm/test/CodeGen/PowerPC/32byte-vector-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/32byte-vector-stack-alignment.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/32byte-vector-stack-alignment.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE + +; This tests for 32-byte stack allocated vectors allocated with +; __attribute__ ((vector_size (32))). + +define dso_local signext i32 @test_32byte_vector() local_unnamed_addr nounwind { +; CHECK-LE-LABEL: test_32byte_vector: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mflr r0 +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: std r0, 16(r1) +; CHECK-LE-NEXT: clrldi r0, r1, 59 +; CHECK-LE-NEXT: subfic r0, r0, -96 +; CHECK-LE-NEXT: stdux r1, r1, r0 +; CHECK-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-NEXT: addis r4, r2, .LCPI0_1@toc@ha +; CHECK-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-NEXT: addi r4, r4, .LCPI0_1@toc@l +; CHECK-LE-NEXT: lvx v2, 0, r3 +; CHECK-LE-NEXT: lvx v3, 0, r4 +; CHECK-LE-NEXT: addi r4, r1, 48 +; CHECK-LE-NEXT: addi r3, r1, 32 +; CHECK-LE-NEXT: stvx v2, 0, r4 +; CHECK-LE-NEXT: stvx v3, 0, r3 +; CHECK-LE-NEXT: bl add +; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: lwa r3, 32(r1) +; CHECK-LE-NEXT: mr r1, r30 +; CHECK-LE-NEXT: ld r0, 16(r1) +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: mtlr r0 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_32byte_vector: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: std r0, 16(r1) +; CHECK-BE-NEXT: clrldi r0, r1, 59 +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: subfic r0, r0, -192 +; CHECK-BE-NEXT: stdux r1, r1, r0 +; CHECK-BE-NEXT: lis r3, -8192 +; CHECK-BE-NEXT: li r4, 5 +; CHECK-BE-NEXT: lis r5, -16384 +; CHECK-BE-NEXT: lis r6, -32768 +; CHECK-BE-NEXT: ori r3, r3, 1 +; CHECK-BE-NEXT: rldic r4, r4, 32, 29 +; CHECK-BE-NEXT: ori r5, r5, 1 +; CHECK-BE-NEXT: ori r6, r6, 1 +; CHECK-BE-NEXT: rldic r3, r3, 3, 29 +; CHECK-BE-NEXT: ori r4, r4, 6 +; CHECK-BE-NEXT: rldic r5, r5, 2, 30 +; CHECK-BE-NEXT: rldic r6, r6, 1, 31 +; CHECK-BE-NEXT: std r3, 152(r1) +; CHECK-BE-NEXT: addi r3, r1, 128 +; CHECK-BE-NEXT: std r4, 144(r1) +; CHECK-BE-NEXT: std r5, 136(r1) +; CHECK-BE-NEXT: std r6, 128(r1) +; CHECK-BE-NEXT: bl add +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: lwa r3, 128(r1) +; CHECK-BE-NEXT: mr r1, r30 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr +entry: + %a = alloca <8 x i32>, align 32 + %0 = bitcast <8 x i32>* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %0) + store <8 x i32> , <8 x i32>* %a, align 32 + %call = call signext i32 @add(<8 x i32>* nonnull %a) + %1 = load <8 x i32>, <8 x i32>* %a, align 32 + %vecext = extractelement <8 x i32> %1, i32 0 + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %0) + ret i32 %vecext +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) nounwind + +declare signext i32 @add(<8 x i32>*) local_unnamed_addr nounwind + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) nounwind diff --git a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll @@ -0,0 +1,316 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE + +@.str = private unnamed_addr constant [7 x i8] c"%d %d\0A\00", align 1 + +define dso_local signext i32 @test() local_unnamed_addr nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r30, -16(r1) +; CHECK-NEXT: mr r30, r1 +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: clrldi r0, r1, 59 +; CHECK-NEXT: subfic r0, r0, -448 +; CHECK-NEXT: stdux r1, r1, r0 +; CHECK-NEXT: addi r3, r1, 96 +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: li r5, 200 +; CHECK-NEXT: bl memset@notoc +; CHECK-NEXT: plxv v2, .LCPI0_1@PCREL(0), 1 +; CHECK-NEXT: plxv v5, .LCPI0_0@PCREL(0), 1 +; CHECK-NEXT: pli r3, 1684234849 +; CHECK-NEXT: plxv v1, .LCPI0_3@PCREL(0), 1 +; CHECK-NEXT: plxv v0, .LCPI0_2@PCREL(0), 1 +; CHECK-NEXT: plxv v7, .LCPI0_5@PCREL(0), 1 +; CHECK-NEXT: plxv v6, .LCPI0_4@PCREL(0), 1 +; CHECK-NEXT: stw r3, 416(r1) +; CHECK-NEXT: vmrghb v4, v2, v2 +; CHECK-NEXT: vmrglb v3, v2, v2 +; CHECK-NEXT: stxv v2, 320(r1) +; CHECK-NEXT: xxspltiw v2, 524296 +; CHECK-NEXT: stxv v5, 336(r1) +; CHECK-NEXT: stxv v0, 368(r1) +; CHECK-NEXT: stxv v1, 352(r1) +; CHECK-NEXT: stxv v6, 400(r1) +; CHECK-NEXT: stxv v7, 384(r1) +; CHECK-NEXT: vslh v4, v4, v2 +; CHECK-NEXT: vslh v3, v3, v2 +; CHECK-NEXT: vsrh v4, v4, v2 +; CHECK-NEXT: vsrh v3, v3, v2 +; CHECK-NEXT: stxv v4, 112(r1) +; CHECK-NEXT: vmrglb v4, v5, v5 +; CHECK-NEXT: stxv v3, 96(r1) +; CHECK-NEXT: vmrglb v3, v1, v1 +; CHECK-NEXT: li r3, 97 +; CHECK-NEXT: vmrghb v5, v5, v5 +; CHECK-NEXT: lha r5, 96(r1) +; CHECK-NEXT: sth r3, 288(r1) +; CHECK-NEXT: pli r3, 6488162 +; CHECK-NEXT: vslh v4, v4, v2 +; CHECK-NEXT: vslh v3, v3, v2 +; CHECK-NEXT: stw r3, 290(r1) +; CHECK-NEXT: li r3, 100 +; CHECK-NEXT: vslh v5, v5, v2 +; CHECK-NEXT: vsrh v4, v4, v2 +; CHECK-NEXT: vsrh v3, v3, v2 +; CHECK-NEXT: sth r3, 294(r1) +; CHECK-NEXT: lbz r3, 320(r1) +; CHECK-NEXT: vsrh v5, v5, v2 +; CHECK-NEXT: stxv v4, 128(r1) +; CHECK-NEXT: vmrghb v4, v1, v1 +; CHECK-NEXT: stxv v3, 160(r1) +; CHECK-NEXT: vmrglb v3, v0, v0 +; CHECK-NEXT: stxv v5, 144(r1) +; CHECK-NEXT: extsb r4, r3 +; CHECK-NEXT: paddi r3, 0, .L.str@PCREL, 1 +; CHECK-NEXT: vslh v4, v4, v2 +; CHECK-NEXT: vslh v3, v3, v2 +; CHECK-NEXT: vsrh v4, v4, v2 +; CHECK-NEXT: vsrh v3, v3, v2 +; CHECK-NEXT: stxv v4, 176(r1) +; CHECK-NEXT: vmrghb v4, v0, v0 +; CHECK-NEXT: stxv v3, 192(r1) +; CHECK-NEXT: vslh v4, v4, v2 +; CHECK-NEXT: vsrh v4, v4, v2 +; CHECK-NEXT: stxv v4, 208(r1) +; CHECK-NEXT: vmrglb v4, v7, v7 +; CHECK-NEXT: vslh v4, v4, v2 +; CHECK-NEXT: vsrh v3, v4, v2 +; CHECK-NEXT: vmrghb v4, v7, v7 +; CHECK-NEXT: stxv v3, 224(r1) +; CHECK-NEXT: vmrglb v3, v6, v6 +; CHECK-NEXT: vslh v4, v4, v2 +; CHECK-NEXT: vsrh v4, v4, v2 +; CHECK-NEXT: vslh v3, v3, v2 +; CHECK-NEXT: stxv v4, 240(r1) +; CHECK-NEXT: vmrghb v4, v6, v6 +; CHECK-NEXT: vsrh v3, v3, v2 +; CHECK-NEXT: stxv v3, 256(r1) +; CHECK-NEXT: vslh v4, v4, v2 +; CHECK-NEXT: vsrh v2, v4, v2 +; CHECK-NEXT: stxv v2, 272(r1) +; CHECK-NEXT: bl printf@notoc +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: mr r1, r30 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: std r0, 16(r1) +; CHECK-BE-NEXT: clrldi r0, r1, 59 +; CHECK-BE-NEXT: subfic r0, r0, -480 +; CHECK-BE-NEXT: stdux r1, r1, r0 +; CHECK-BE-NEXT: addi r3, r1, 128 +; CHECK-BE-NEXT: li r4, 0 +; CHECK-BE-NEXT: li r5, 200 +; CHECK-BE-NEXT: bl memset +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: addis r3, r2, .LCPI0_1@toc@ha +; CHECK-BE-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI0_1@toc@l +; CHECK-BE-NEXT: addi r4, r4, .LCPI0_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI0_2@toc@ha +; CHECK-BE-NEXT: lxvx v3, 0, r4 +; CHECK-BE-NEXT: addis r4, r2, .LCPI0_3@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI0_2@toc@l +; CHECK-BE-NEXT: addi r4, r4, .LCPI0_3@toc@l +; CHECK-BE-NEXT: lxvx v4, 0, r3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI0_4@toc@ha +; CHECK-BE-NEXT: lxvx v5, 0, r4 +; CHECK-BE-NEXT: addis r4, r2, .LCPI0_5@toc@ha +; CHECK-BE-NEXT: vmrglb v1, v2, v2 +; CHECK-BE-NEXT: vmrghb v0, v2, v2 +; CHECK-BE-NEXT: stxv v2, 352(r1) +; CHECK-BE-NEXT: xxspltiw v2, 524296 +; CHECK-BE-NEXT: addi r3, r3, .LCPI0_4@toc@l +; CHECK-BE-NEXT: addi r4, r4, .LCPI0_5@toc@l +; CHECK-BE-NEXT: lxvx v6, 0, r3 +; CHECK-BE-NEXT: pli r3, 1633837924 +; CHECK-BE-NEXT: vslh v1, v1, v2 +; CHECK-BE-NEXT: vslh v0, v0, v2 +; CHECK-BE-NEXT: lxvx v7, 0, r4 +; CHECK-BE-NEXT: stw r3, 448(r1) +; CHECK-BE-NEXT: stxv v4, 400(r1) +; CHECK-BE-NEXT: stxv v5, 384(r1) +; CHECK-BE-NEXT: stxv v3, 368(r1) +; CHECK-BE-NEXT: vsrh v1, v1, v2 +; CHECK-BE-NEXT: vsrh v0, v0, v2 +; CHECK-BE-NEXT: stxv v0, 128(r1) +; CHECK-BE-NEXT: vmrghb v0, v5, v5 +; CHECK-BE-NEXT: vmrglb v5, v5, v5 +; CHECK-BE-NEXT: stxv v1, 144(r1) +; CHECK-BE-NEXT: vmrghb v1, v3, v3 +; CHECK-BE-NEXT: vmrglb v3, v3, v3 +; CHECK-BE-NEXT: li r3, 97 +; CHECK-BE-NEXT: stxv v6, 432(r1) +; CHECK-BE-NEXT: stxv v7, 416(r1) +; CHECK-BE-NEXT: lha r5, 128(r1) +; CHECK-BE-NEXT: vslh v0, v0, v2 +; CHECK-BE-NEXT: sth r3, 320(r1) +; CHECK-BE-NEXT: pli r3, 6422627 +; CHECK-BE-NEXT: vslh v5, v5, v2 +; CHECK-BE-NEXT: vslh v3, v3, v2 +; CHECK-BE-NEXT: stw r3, 322(r1) +; CHECK-BE-NEXT: li r3, 100 +; CHECK-BE-NEXT: vslh v1, v1, v2 +; CHECK-BE-NEXT: vsrh v5, v5, v2 +; CHECK-BE-NEXT: vsrh v3, v3, v2 +; CHECK-BE-NEXT: sth r3, 326(r1) +; CHECK-BE-NEXT: lbz r3, 352(r1) +; CHECK-BE-NEXT: vsrh v1, v1, v2 +; CHECK-BE-NEXT: stxv v5, 208(r1) +; CHECK-BE-NEXT: stxv v3, 176(r1) +; CHECK-BE-NEXT: vsrh v3, v0, v2 +; CHECK-BE-NEXT: stxv v1, 160(r1) +; CHECK-BE-NEXT: stxv v3, 192(r1) +; CHECK-BE-NEXT: vmrghb v3, v4, v4 +; CHECK-BE-NEXT: vmrglb v4, v4, v4 +; CHECK-BE-NEXT: extsb r4, r3 +; CHECK-BE-NEXT: addis r3, r2, .L.str@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .L.str@toc@l +; CHECK-BE-NEXT: vslh v4, v4, v2 +; CHECK-BE-NEXT: vslh v3, v3, v2 +; CHECK-BE-NEXT: vsrh v4, v4, v2 +; CHECK-BE-NEXT: vsrh v3, v3, v2 +; CHECK-BE-NEXT: stxv v4, 240(r1) +; CHECK-BE-NEXT: vmrghb v4, v7, v7 +; CHECK-BE-NEXT: stxv v3, 224(r1) +; CHECK-BE-NEXT: vslh v4, v4, v2 +; CHECK-BE-NEXT: vsrh v3, v4, v2 +; CHECK-BE-NEXT: vmrglb v4, v7, v7 +; CHECK-BE-NEXT: stxv v3, 256(r1) +; CHECK-BE-NEXT: vmrghb v3, v6, v6 +; CHECK-BE-NEXT: vslh v4, v4, v2 +; CHECK-BE-NEXT: vsrh v4, v4, v2 +; CHECK-BE-NEXT: vslh v3, v3, v2 +; CHECK-BE-NEXT: stxv v4, 272(r1) +; CHECK-BE-NEXT: vmrglb v4, v6, v6 +; CHECK-BE-NEXT: vsrh v3, v3, v2 +; CHECK-BE-NEXT: stxv v3, 288(r1) +; CHECK-BE-NEXT: vslh v4, v4, v2 +; CHECK-BE-NEXT: vsrh v2, v4, v2 +; CHECK-BE-NEXT: stxv v2, 304(r1) +; CHECK-BE-NEXT: bl printf +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: li r3, 0 +; CHECK-BE-NEXT: mr r1, r30 +; CHECK-BE-NEXT: ld r0, 16(r1) +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: mtlr r0 +; CHECK-BE-NEXT: blr +entry: + + %Arr1 = alloca [100 x i8], align 32 + %Arr2 = alloca [100 x i16], align 32 + %Arr235 = bitcast [100 x i16]* %Arr2 to i8* + %0 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 100, i8* nonnull %0) + call void @llvm.lifetime.start.p0i8(i64 200, i8* nonnull %Arr235) + call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(200) %Arr235, i8 0, i64 200, i1 false) + %1 = bitcast [100 x i8]* %Arr1 to <32 x i8>* + store <32 x i8> , <32 x i8>* %1, align 32 + %2 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 32 + %3 = bitcast i8* %2 to <32 x i8>* + store <32 x i8> , <32 x i8>* %3, align 32 + %4 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 64 + %5 = bitcast i8* %4 to <32 x i8>* + store <32 x i8> , <32 x i8>* %5, align 32 + %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 96 + store i8 97, i8* %arrayidx, align 32 + %arrayidx.1 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 97 + store i8 98, i8* %arrayidx.1, align 1 + %arrayidx.2 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 98 + store i8 99, i8* %arrayidx.2, align 2 + %arrayidx.3 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 99 + store i8 100, i8* %arrayidx.3, align 1 + %6 = bitcast [100 x i8]* %Arr1 to <16 x i8>* + %wide.load = load <16 x i8>, <16 x i8>* %6, align 32 + %7 = sext <16 x i8> %wide.load to <16 x i16> + %8 = bitcast [100 x i16]* %Arr2 to <16 x i16>* + store <16 x i16> %7, <16 x i16>* %8, align 32 + %9 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 16 + %10 = bitcast i8* %9 to <16 x i8>* + %wide.load.1 = load <16 x i8>, <16 x i8>* %10, align 16 + %11 = sext <16 x i8> %wide.load.1 to <16 x i16> + %12 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 16 + %13 = bitcast i16* %12 to <16 x i16>* + store <16 x i16> %11, <16 x i16>* %13, align 32 + %14 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 32 + %15 = bitcast i8* %14 to <16 x i8>* + %wide.load.2 = load <16 x i8>, <16 x i8>* %15, align 32 + %16 = sext <16 x i8> %wide.load.2 to <16 x i16> + %17 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 32 + %18 = bitcast i16* %17 to <16 x i16>* + store <16 x i16> %16, <16 x i16>* %18, align 32 + %19 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 48 + %20 = bitcast i8* %19 to <16 x i8>* + %wide.load.3 = load <16 x i8>, <16 x i8>* %20, align 16 + %21 = sext <16 x i8> %wide.load.3 to <16 x i16> + %22 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 48 + %23 = bitcast i16* %22 to <16 x i16>* + store <16 x i16> %21, <16 x i16>* %23, align 32 + %24 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 64 + %25 = bitcast i8* %24 to <16 x i8>* + %wide.load.4 = load <16 x i8>, <16 x i8>* %25, align 32 + %26 = sext <16 x i8> %wide.load.4 to <16 x i16> + %27 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 64 + %28 = bitcast i16* %27 to <16 x i16>* + store <16 x i16> %26, <16 x i16>* %28, align 32 + %29 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 80 + %30 = bitcast i8* %29 to <16 x i8>* + %wide.load.5 = load <16 x i8>, <16 x i8>* %30, align 16 + %31 = sext <16 x i8> %wide.load.5 to <16 x i16> + %32 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 80 + %33 = bitcast i16* %32 to <16 x i16>* + store <16 x i16> %31, <16 x i16>* %33, align 32 + %arrayidx10 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 96 + %34 = load i8, i8* %arrayidx10, align 32 + %conv11 = sext i8 %34 to i16 + %arrayidx13 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 96 + store i16 %conv11, i16* %arrayidx13, align 32 + %arrayidx10.1 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 97 + %35 = load i8, i8* %arrayidx10.1, align 1 + %conv11.1 = sext i8 %35 to i16 + %arrayidx13.1 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 97 + store i16 %conv11.1, i16* %arrayidx13.1, align 2 + %arrayidx10.2 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 98 + %36 = load i8, i8* %arrayidx10.2, align 2 + %conv11.2 = sext i8 %36 to i16 + %arrayidx13.2 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 98 + store i16 %conv11.2, i16* %arrayidx13.2, align 4 + %arrayidx10.3 = getelementptr inbounds [100 x i8], [100 x i8]* %Arr1, i64 0, i64 99 + %37 = load i8, i8* %arrayidx10.3, align 1 + %conv11.3 = sext i8 %37 to i16 + %arrayidx13.3 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 99 + store i16 %conv11.3, i16* %arrayidx13.3, align 2 + %38 = load i8, i8* %0, align 32 + %conv18 = sext i8 %38 to i32 + %arrayidx19 = getelementptr inbounds [100 x i16], [100 x i16]* %Arr2, i64 0, i64 0 + %39 = load i16, i16* %arrayidx19, align 32 + %conv20 = sext i16 %39 to i32 + %call = tail call signext i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i32 signext %conv18, i32 signext %conv20) + call void @llvm.lifetime.end.p0i8(i64 200, i8* nonnull %Arr235) + call void @llvm.lifetime.end.p0i8(i64 100, i8* nonnull %0) + ret i32 0 +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) nounwind + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) nounwind + +declare noundef signext i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr nounwind + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) nounwind