diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -347,6 +347,18 @@ ElementCount EC = EI.getVectorOperandType()->getElementCount(); unsigned NumElts = EC.getKnownMinValue(); + if (IntrinsicInst *II = dyn_cast(SrcVec)) { + Intrinsic::ID IID = II->getIntrinsicID(); + // Index needs to be lower than the minimum size of the vector, because + // for scalable vector, the vector size is known at run time. + if (IID == Intrinsic::experimental_stepvector && + IndexC->getValue().ult(NumElts)) { + Type *Ty = EI.getType(); + auto *Idx = ConstantInt::get( + Ty, IndexC->getValue().zextOrTrunc(Ty->getIntegerBitWidth())); + return replaceInstUsesWith(EI, Idx); + } + } // InstSimplify should handle cases where the index is invalid. // For fixed-length vector, it's invalid to extract out-of-range element. if (!EC.isScalable() && IndexC->getValue().uge(NumElts)) diff --git a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll --- a/llvm/test/Transforms/InstCombine/vscale_extractelement.ll +++ b/llvm/test/Transforms/InstCombine/vscale_extractelement.ll @@ -183,3 +183,69 @@ %E = extractelement %vec_int, i32 2147483647 ret i32 %E } + +; Step vector optimization + +define i64 @ext_lane0_from_stepvec() { +; CHECK-LABEL: @ext_lane0_from_stepvec( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i64 0 +; +entry: + %0 = call @llvm.experimental.stepvector.nxv4i64() + %1 = extractelement %0, i32 0 + ret i64 %1 +} + +define i32 @ext_lane3_from_stepvec() { +; CHECK-LABEL: @ext_lane3_from_stepvec( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 3 +; +entry: + %0 = call @llvm.experimental.stepvector.nxv4i32() + %1 = extractelement %0, i64 3 + ret i32 %1 +} + +define i64 @ext_lane_out_of_range_from_stepvec() { +; CHECK-LABEL: @ext_lane_out_of_range_from_stepvec( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i32 5 +; CHECK-NEXT: ret i64 [[TMP1]] +; +entry: + %0 = call @llvm.experimental.stepvector.nxv4i64() + %1 = extractelement %0, i32 5 + ret i64 %1 +} + +define i64 @ext_lane_invalid_from_stepvec() { +; CHECK-LABEL: @ext_lane_invalid_from_stepvec( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i32 -1 +; CHECK-NEXT: ret i64 [[TMP1]] +; +entry: + %0 = call @llvm.experimental.stepvector.nxv4i64() + %1 = extractelement %0, i32 -1 + ret i64 %1 +} + +define i64 @ext_lane_unknown_from_stepvec(i32 %v) { +; CHECK-LABEL: @ext_lane_unknown_from_stepvec( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i32 [[V:%.*]] +; CHECK-NEXT: ret i64 [[TMP1]] +; +entry: + %0 = call @llvm.experimental.stepvector.nxv4i64() + %1 = extractelement %0, i32 %v + ret i64 %1 +} + +declare @llvm.experimental.stepvector.nxv4i64(); +declare @llvm.experimental.stepvector.nxv4i32();