Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -1689,6 +1689,25 @@ if (BitWidth >= 32) Known.Zero.setBitsFrom(31); break; + case Intrinsic::vscale: { + if (!II->getFunction()->hasFnAttribute(Attribute::VScaleRange)) + break; + + uint64_t MaxVScale = II->getFunction() + ->getFnAttribute(Attribute::VScaleRange) + .getVScaleRangeArgs() + .second; + + if (MaxVScale == 0) + break; + + unsigned FirstZeroBit = 64 - countLeadingZeros(MaxVScale); + unsigned VScaleTypeSize = Q.DL.getTypeSizeInBits(II->getType()->getScalarType()); + if (FirstZeroBit < VScaleTypeSize) + Known.Zero.setBitsFrom(FirstZeroBit); + + break; + } } } break; Index: llvm/test/Transforms/InstCombine/icmp-vscale.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/icmp-vscale.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +define i1 @ugt_vscale64_x_32() vscale_range(0,16) { +; CHECK-LABEL: @ugt_vscale64_x_32( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %vscale = call i64 @llvm.vscale.i64() + %num_els = shl i64 %vscale, 5 + %res = icmp ugt i64 %num_els, 1024 + ret i1 %res +} + +define i1 @ugt_vscale64_x_31() vscale_range(0,16) { +; CHECK-LABEL: @ugt_vscale64_x_31( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %vscale = call i64 @llvm.vscale.i64() + %num_els = mul i64 %vscale, 31 + %res = icmp ugt i64 %num_els, 1024 + ret i1 %res +} + +define i1 @ugt_vscale16_x_32() vscale_range(0,16) { +; CHECK-LABEL: @ugt_vscale16_x_32( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %vscale = call i16 @llvm.vscale.i16() + %num_els = shl i16 %vscale, 5 + %res = icmp ugt i16 %num_els, 1024 + ret i1 %res +} + +define i1 @ult_vscale16() vscale_range(0,16) { +; CHECK-LABEL: @ult_vscale16( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %vscale = call i16 @llvm.vscale.i16() + %res = icmp ult i16 1024, %vscale + ret i1 %res +} + +define i1 @ule_vscale64() vscale_range(0,16) { +; CHECK-LABEL: @ule_vscale64( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %vscale = call i64 @llvm.vscale.i64() + %res = icmp ule i64 1024, %vscale + ret i1 %res +} + +define i1 @ne_vscale64_x_32() vscale_range(0,16) { +; CHECK-LABEL: @ne_vscale64_x_32( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %vscale = call i64 @llvm.vscale.i64() + %num_els = mul i64 %vscale, 32 + %res = icmp ne i64 %num_els, 39488 + ret i1 %res +} + +declare i8 @llvm.vscale.i8() +declare i16 @llvm.vscale.i16() +declare i32 @llvm.vscale.i32() +declare i64 @llvm.vscale.i64() Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll @@ -17,8 +17,8 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ] ; CHECK: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP5]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl [[TMP8]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], [[VECTOR_GEP]] @@ -80,16 +80,16 @@ ; CHECK-NEXT: %[[LPTR1:.*]] = bitcast i32* %[[LGEP1]] to * ; CHECK-NEXT: %{{.*}} = load , * %[[LPTR1]], align 4 ; CHECK-NEXT: %[[VSCALE1:.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: %[[TMP1:.*]] = shl i32 %[[VSCALE1]], 2 -; CHECK-NEXT: %[[TMP2:.*]] = sext i32 %[[TMP1]] to i64 +; CHECK-NEXT: %[[TMP1:.*]] = shl nuw nsw i32 %[[VSCALE1]], 2 +; CHECK-NEXT: %[[TMP2:.*]] = zext i32 %[[TMP1]] to i64 ; CHECK-NEXT: %[[LGEP2:.*]] = getelementptr i32, i32* %[[LGEP1]], i64 %[[TMP2]] ; CHECK-NEXT: %[[LPTR2:.*]] = bitcast i32* %[[LGEP2]] to * ; CHECK-NEXT: %{{.*}} = load , * %[[LPTR2]], align 4 ; CHECK: %[[SPTR1:.*]] = bitcast i32* %[[SGEP1]] to * ; CHECK-NEXT: store %{{.*}}, * %[[SPTR1]], align 4 ; CHECK-NEXT: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: %[[TMP3:.*]] = shl i32 %[[VSCALE2]], 2 -; CHECK-NEXT: %[[TMP4:.*]] = sext i32 %[[TMP3]] to i64 +; CHECK-NEXT: %[[TMP3:.*]] = shl nuw nsw i32 %[[VSCALE2]], 2 +; CHECK-NEXT: %[[TMP4:.*]] = zext i32 %[[TMP3]] to i64 ; CHECK-NEXT: %[[SGEP2:.*]] = getelementptr i32, i32* %[[SGEP1]], i64 %[[TMP4]] ; CHECK-NEXT: %[[SPTR2:.*]] = bitcast i32* %[[SGEP2]] to * ; CHECK-NEXT: store %{{.*}}, * %[[SPTR2]], align 4 @@ -133,7 +133,7 @@ ; CHECK-NEXT: %[[APTRS1:.*]] = getelementptr i32, i32* %a, %[[VECIND1]] ; CHECK-NEXT: %[[GEPA1:.*]] = getelementptr i32, i32* %a, i64 %[[IDX]] ; CHECK-NEXT: %[[VSCALE64:.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: %[[VSCALE64X2:.*]] = shl i64 %[[VSCALE64]], 1 +; CHECK-NEXT: %[[VSCALE64X2:.*]] = shl nuw nsw i64 %[[VSCALE64]], 1 ; CHECK-NEXT: %[[TMP3:.*]] = insertelement poison, i64 %[[VSCALE64X2]], i32 0 ; CHECK-NEXT: %[[TMP4:.*]] = shufflevector %[[TMP3]], poison, zeroinitializer ; CHECK-NEXT: %[[TMP5:.*]] = add %[[TMP4]], %[[STEPVEC]] @@ -147,8 +147,8 @@ ; CHECK: %[[BPTR1:.*]] = bitcast i32** %[[GEPB1]] to * ; CHECK-NEXT: store %[[APTRS1]], * %[[BPTR1]], align 8 ; CHECK: %[[VSCALE32:.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: %[[VSCALE32X2:.*]] = shl i32 %[[VSCALE32]], 1 -; CHECK-NEXT: %[[TMP6:.*]] = sext i32 %[[VSCALE32X2]] to i64 +; CHECK-NEXT: %[[VSCALE32X2:.*]] = shl nuw nsw i32 %[[VSCALE32]], 1 +; CHECK-NEXT: %[[TMP6:.*]] = zext i32 %[[VSCALE32X2]] to i64 ; CHECK-NEXT: %[[GEPB2:.*]] = getelementptr i32*, i32** %[[GEPB1]], i64 %[[TMP6]] ; CHECK-NEXT: %[[BPTR2:.*]] = bitcast i32** %[[GEPB2]] to * ; CHECK-NEXT store %[[APTRS2]], * %[[BPTR2]], align 8