diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.h b/llvm/lib/Target/VE/VETargetTransformInfo.h --- a/llvm/lib/Target/VE/VETargetTransformInfo.h +++ b/llvm/lib/Target/VE/VETargetTransformInfo.h @@ -40,11 +40,28 @@ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; } + unsigned getNumberOfRegisters(unsigned ClassID) const { + bool VectorRegs = (ClassID == 1); + if (VectorRegs) { + // TODO report vregs once vector isel is stable. + return 0; + } - unsigned getRegisterBitWidth(bool Vector) const { return 64; } + return 64; + } - unsigned getMinVectorRegisterBitWidth() const { return 64; } + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + // TODO report vregs once vector isel is stable. + return 0; + } + return 64; + } + + unsigned getMinVectorRegisterBitWidth() const { + // TODO report vregs once vector isel is stable. + return 0; + } }; } // namespace llvm diff --git a/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll b/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/VE/disable_lv.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -loop-vectorize -mtriple=ve-linux -S | FileCheck %s -check-prefix=VE +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-pc_linux -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX + +; Make sure LV does not trigger for VE on an appealing loop that vectorizes for x86 AVX. + +; TODO: Remove this test once VE vector isel is deemed stable. + +; VE-NOT: llvm.loop.isvectorized +; AVX: llvm.loop.isvectorized + +define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32 signext %n) local_unnamed_addr { +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %omp.inner.for.body.preheader, label %simd.if.end + +omp.inner.for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %omp.inner.for.body + +omp.inner.for.body: ; preds = %omp.inner.for.body.preheader, %omp.inner.for.body + %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ] + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !6 + %mul6 = mul nsw i32 %0, 3 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %mul6, i32* %arrayidx8, align 4, !llvm.access.group !6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !7 + +simd.if.end: ; preds = %omp.inner.for.body, %entry + ret void +} + +!6 = distinct !{} +!7 = distinct !{!7, !8, !9} +!8 = !{!"llvm.loop.parallel_accesses", !6} +!9 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/VE/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/VE/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/VE/lit.local.cfg @@ -0,0 +1,4 @@ +if not 'X86' in config.root.targets: + config.unsupported = True +if not 'VE' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll b/llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/VE/disable_slp.ll @@ -0,0 +1,77 @@ +; RUN: opt < %s -basic-aa -slp-vectorizer -mtriple=ve-linux -S | FileCheck %s -check-prefix=VE +; RUN: opt < %s -basic-aa -slp-vectorizer -mtriple=x86_64-pc_linux -mcpu=core-avx2 -S | FileCheck %s -check-prefix=SSE + +; Make sure SLP does not trigger for VE on an appealing set of combinable loads +; and stores that vectorizes for x86 SSE. + +; TODO: Remove this test once VE vector isel is deemed stable. + +; VE-NOT: x double +; SSE: x double + +define void @foo(double* noalias %A0p, double* noalias %B0p) { +entry: + %A1p = getelementptr inbounds double, double* %A0p, i64 1 + %A2p = getelementptr inbounds double, double* %A0p, i64 2 + %A3p = getelementptr inbounds double, double* %A0p, i64 3 + %A4p = getelementptr inbounds double, double* %A0p, i64 4 + %A5p = getelementptr inbounds double, double* %A0p, i64 5 + %A6p = getelementptr inbounds double, double* %A0p, i64 6 + %A7p = getelementptr inbounds double, double* %A0p, i64 7 + %A8p = getelementptr inbounds double, double* %A0p, i64 8 + %A9p = getelementptr inbounds double, double* %A0p, i64 9 + %A10p = getelementptr inbounds double, double* %A0p, i64 10 + %A11p = getelementptr inbounds double, double* %A0p, i64 11 + %A12p = getelementptr inbounds double, double* %A0p, i64 12 + %A13p = getelementptr inbounds double, double* %A0p, i64 13 + %A14p = getelementptr inbounds double, double* %A0p, i64 14 + %A15p = getelementptr inbounds double, double* %A0p, i64 15 + %A0 = load double, double* %A0p, align 8 + %A1 = load double, double* %A1p, align 8 + %A2 = load double, double* %A2p, align 8 + %A3 = load double, double* %A3p, align 8 + %A4 = load double, double* %A4p, align 8 + %A5 = load double, double* %A5p, align 8 + %A6 = load double, double* %A6p, align 8 + %A7 = load double, double* %A7p, align 8 + %A8 = load double, double* %A8p, align 8 + %A9 = load double, double* %A9p, align 8 + %A10 = load double, double* %A10p, align 8 + %A11 = load double, double* %A11p, align 8 + %A12 = load double, double* %A12p, align 8 + %A13 = load double, double* %A13p, align 8 + %A14 = load double, double* %A14p, align 8 + %A15 = load double, double* %A15p, align 8 + %B1p = getelementptr inbounds double, double* %B0p, i64 1 + %B2p = getelementptr inbounds double, double* %B0p, i64 2 + %B3p = getelementptr inbounds double, double* %B0p, i64 3 + %B4p = getelementptr inbounds double, double* %B0p, i64 4 + %B5p = getelementptr inbounds double, double* %B0p, i64 5 + %B6p = getelementptr inbounds double, double* %B0p, i64 6 + %B7p = getelementptr inbounds double, double* %B0p, i64 7 + %B8p = getelementptr inbounds double, double* %B0p, i64 8 + %B9p = getelementptr inbounds double, double* %B0p, i64 9 + %B10p = getelementptr inbounds double, double* %B0p, i64 10 + %B11p = getelementptr inbounds double, double* %B0p, i64 11 + %B12p = getelementptr inbounds double, double* %B0p, i64 12 + %B13p = getelementptr inbounds double, double* %B0p, i64 13 + %B14p = getelementptr inbounds double, double* %B0p, i64 14 + %B15p = getelementptr inbounds double, double* %B0p, i64 15 + store double %A0, double* %B0p, align 8 + store double %A1, double* %B1p, align 8 + store double %A2, double* %B2p, align 8 + store double %A3, double* %B3p, align 8 + store double %A4, double* %B4p, align 8 + store double %A5, double* %B5p, align 8 + store double %A6, double* %B6p, align 8 + store double %A7, double* %B7p, align 8 + store double %A8, double* %B8p, align 8 + store double %A9, double* %B9p, align 8 + store double %A10, double* %B10p, align 8 + store double %A11, double* %B11p, align 8 + store double %A12, double* %B12p, align 8 + store double %A13, double* %B13p, align 8 + store double %A14, double* %B14p, align 8 + store double %A15, double* %B15p, align 8 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/VE/lit.local.cfg b/llvm/test/Transforms/SLPVectorizer/VE/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/VE/lit.local.cfg @@ -0,0 +1,4 @@ +if not 'X86' in config.root.targets: + config.unsupported = True +if not 'VE' in config.root.targets: + config.unsupported = True