Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -1437,6 +1438,11 @@ case Intrinsic::experimental_vector_reduce_smax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_umax: + // Target intrinsics + case Intrinsic::arm_mve_vctp8: + case Intrinsic::arm_mve_vctp16: + case Intrinsic::arm_mve_vctp32: + case Intrinsic::arm_mve_vctp64: return true; // Floating point operations cannot be folded in strictfp functions in @@ -2700,7 +2706,8 @@ SmallVector Lane(Operands.size()); Type *Ty = FVTy->getElementType(); - if (IntrinsicID == Intrinsic::masked_load) { + switch (IntrinsicID) { + case Intrinsic::masked_load: { auto *SrcPtr = Operands[0]; auto *Mask = Operands[2]; auto *Passthru = Operands[3]; @@ -2738,6 +2745,32 @@ return nullptr; return ConstantVector::get(NewElements); } + case Intrinsic::arm_mve_vctp8: + case Intrinsic::arm_mve_vctp16: + case Intrinsic::arm_mve_vctp32: + case Intrinsic::arm_mve_vctp64: { + if (auto *Op = dyn_cast(Operands[0])) { + unsigned Lanes = FVTy->getNumElements(); + uint64_t Limit = Op->getZExtValue(); + // vctp64 are currently modelled as returning a v4i1, not a v2i1. Make + // sure we get the limit right in that case and set all relevant lanes. + if (IntrinsicID == Intrinsic::arm_mve_vctp64) + Limit *= 2; + + SmallVector NCs; + for (unsigned i = 0; i < Lanes; i++) { + if (i < Limit) + NCs.push_back(ConstantInt::get(Ty, 1)); + else + NCs.push_back(ConstantInt::get(Ty, 0)); + } + return ConstantVector::get(NCs); + } + break; + } + default: + break; + } for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { // Gather a column of constants. Index: llvm/test/Analysis/ConstantFolding/ARM/lit.local.cfg =================================================================== --- /dev/null +++ llvm/test/Analysis/ConstantFolding/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True Index: llvm/test/Analysis/ConstantFolding/ARM/mve-vctp.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ConstantFolding/ARM/mve-vctp.ll @@ -0,0 +1,267 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instsimplify -S -o - %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define <16 x i1> @vctp8_0() { +; CHECK-LABEL: @vctp8_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> zeroinitializer +; +entry: + %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 0) + ret <16 x i1> %int +} + +define <16 x i1> @vctp8_1() { +; CHECK-LABEL: @vctp8_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 1) + ret <16 x i1> %int +} + +define <16 x i1> @vctp8_8() { +; CHECK-LABEL: @vctp8_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 8) + ret <16 x i1> %int +} + +define <16 x i1> @vctp8_15() { +; CHECK-LABEL: @vctp8_15( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 15) + ret <16 x i1> %int +} + +define <16 x i1> @vctp8_16() { +; CHECK-LABEL: @vctp8_16( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 16) + ret <16 x i1> %int +} + +define <16 x i1> @vctp8_100() { +; CHECK-LABEL: @vctp8_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 100) + ret <16 x i1> %int +} + +define <16 x i1> @vctp8_m1() { +; CHECK-LABEL: @vctp8_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.arm.mve.vctp8(i32 -1) + ret <16 x i1> %int +} + + + +define <8 x i1> @vctp16_0() { +; CHECK-LABEL: @vctp16_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> zeroinitializer +; +entry: + %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 0) + ret <8 x i1> %int +} + +define <8 x i1> @vctp16_1() { +; CHECK-LABEL: @vctp16_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 1) + ret <8 x i1> %int +} + +define <8 x i1> @vctp16_4() { +; CHECK-LABEL: @vctp16_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 4) + ret <8 x i1> %int +} + +define <8 x i1> @vctp16_7() { +; CHECK-LABEL: @vctp16_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 7) + ret <8 x i1> %int +} + +define <8 x i1> @vctp16_8() { +; CHECK-LABEL: @vctp16_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 8) + ret <8 x i1> %int +} + +define <8 x i1> @vctp16_100() { +; CHECK-LABEL: @vctp16_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 100) + ret <8 x i1> %int +} + +define <8 x i1> @vctp16_m1() { +; CHECK-LABEL: @vctp16_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.arm.mve.vctp16(i32 -1) + ret <8 x i1> %int +} + + + +define <4 x i1> @vctp32_0() { +; CHECK-LABEL: @vctp32_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> zeroinitializer +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 0) + ret <4 x i1> %int +} + +define <4 x i1> @vctp32_1() { +; CHECK-LABEL: @vctp32_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 1) + ret <4 x i1> %int +} + +define <4 x i1> @vctp32_3() { +; CHECK-LABEL: @vctp32_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 3) + ret <4 x i1> %int +} + +define <4 x i1> @vctp32_4() { +; CHECK-LABEL: @vctp32_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 4) + ret <4 x i1> %int +} + +define <4 x i1> @vctp32_100() { +; CHECK-LABEL: @vctp32_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 100) + ret <4 x i1> %int +} + +define <4 x i1> @vctp32_m1() { +; CHECK-LABEL: @vctp32_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp32(i32 -1) + ret <4 x i1> %int +} + + + +define <4 x i1> @vctp64_0() { +; CHECK-LABEL: @vctp64_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> zeroinitializer +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 0) + ret <4 x i1> %int +} + +define <4 x i1> @vctp64_1() { +; CHECK-LABEL: @vctp64_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 1) + ret <4 x i1> %int +} + +define <4 x i1> @vctp64_2() { +; CHECK-LABEL: @vctp64_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 2) + ret <4 x i1> %int +} + +define <4 x i1> @vctp64_100() { +; CHECK-LABEL: @vctp64_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 100) + ret <4 x i1> %int +} + +define <4 x i1> @vctp64_m1() { +; CHECK-LABEL: @vctp64_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.arm.mve.vctp64(i32 -1) + ret <4 x i1> %int +} + + + +declare <4 x i1> @llvm.arm.mve.vctp64(i32) +declare <4 x i1> @llvm.arm.mve.vctp32(i32) +declare <8 x i1> @llvm.arm.mve.vctp16(i32) +declare <16 x i1> @llvm.arm.mve.vctp8(i32)