diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1456,6 +1456,7 @@ case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::masked_load: + case Intrinsic::get_active_lane_mask: case Intrinsic::abs: case Intrinsic::smax: case Intrinsic::smin: @@ -2927,6 +2928,25 @@ } break; } + case Intrinsic::get_active_lane_mask: { + auto *Op0 = dyn_cast(Operands[0]); + auto *Op1 = dyn_cast(Operands[1]); + if (Op0 && Op1) { + unsigned Lanes = FVTy->getNumElements(); + uint64_t Base = Op0->getZExtValue(); + uint64_t Limit = Op1->getZExtValue(); + + SmallVector NCs; + for (unsigned i = 0; i < Lanes; i++) { + if (Base + i < Limit) + NCs.push_back(ConstantInt::getTrue(Ty)); + else + NCs.push_back(ConstantInt::getFalse(Ty)); + } + return ConstantVector::get(NCs); + } + break; + } default: break; } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll @@ -0,0 +1,300 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instsimplify -S -o - %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define <16 x i1> @v16i1_0() { +; CHECK-LABEL: @v16i1_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> zeroinitializer +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 0) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_1() { +; CHECK-LABEL: @v16i1_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 1) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_8() { +; CHECK-LABEL: @v16i1_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 8) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_15() { +; CHECK-LABEL: @v16i1_15( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 15) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_16() { +; CHECK-LABEL: @v16i1_16( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 16) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_100() { +; CHECK-LABEL: @v16i1_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 100) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_m1() { +; CHECK-LABEL: @v16i1_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 -1) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_10_11() { +; CHECK-LABEL: @v16i1_10_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 10, i32 11) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_12_11() { +; CHECK-LABEL: @v16i1_12_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> zeroinitializer +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 12, i32 11) + ret <16 x i1> %int +} + + + +define <8 x i1> @v8i1_0() { +; CHECK-LABEL: @v8i1_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> zeroinitializer +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 0) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_1() { +; CHECK-LABEL: @v8i1_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 1) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_4() { +; CHECK-LABEL: @v8i1_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 4) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_7() { +; CHECK-LABEL: @v8i1_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 7) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_8() { +; CHECK-LABEL: @v8i1_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 8) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_100() { +; CHECK-LABEL: @v8i1_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 100) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_m1() { +; CHECK-LABEL: @v8i1_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 -1) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_10_11() { +; CHECK-LABEL: @v8i1_10_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 10, i32 11) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_12_11() { +; CHECK-LABEL: @v8i1_12_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> zeroinitializer +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 12, i32 11) + ret <8 x i1> %int +} + + + +define <4 x i1> @v4i1_0() { +; CHECK-LABEL: @v4i1_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> zeroinitializer +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 0) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_1() { +; CHECK-LABEL: @v4i1_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 1) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_3() { +; CHECK-LABEL: @v4i1_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_4() { +; CHECK-LABEL: @v4i1_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_100() { +; CHECK-LABEL: @v4i1_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 100) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_m1() { +; CHECK-LABEL: @v4i1_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 -1) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_10_11() { +; CHECK-LABEL: @v4i1_10_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 10, i32 11) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_12_11() { +; CHECK-LABEL: @v4i1_12_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> zeroinitializer +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 12, i32 11) + ret <4 x i1> %int +} + + + +define <4 x i1> @v4i1_nc1(i32 %x) { +; CHECK-LABEL: @v4i1_nc1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[X:%.*]], i32 11) +; CHECK-NEXT: ret <4 x i1> [[INT]] +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %x, i32 11) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_nc2(i32 %x) { +; CHECK-LABEL: @v4i1_nc2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 [[X:%.*]]) +; CHECK-NEXT: ret <4 x i1> [[INT]] +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 %x) + ret <4 x i1> %int +} + + + + + +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)