Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1013,19 +1013,36 @@ /// Check if it is legal to scalarize a memory access to \p VecTy at index \p /// Idx. \p Idx must access a valid vector element. -static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, +static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT) { + // This is the number of elements of fixed vector types, + // or the minium number of elements of scalable vector types. + uint64_t NumElements; + + // We do checks for fixed vector types and scalable vector types in + // little endian. + if (isa(VecTy)) { + auto FixedVecTy = cast(VecTy); + NumElements = FixedVecTy->getNumElements(); + } else { + const DataLayout &DL = CtxI->getModule()->getDataLayout(); + if (DL.isBigEndian()) + return ScalarizationResult::unsafe(); + auto ScalableVecTy = cast(VecTy); + NumElements = ScalableVecTy->getMinNumElements(); + } + if (auto *C = dyn_cast(Idx)) { - if (C->getValue().ult(VecTy->getNumElements())) + if (C->getValue().ult(NumElements)) return ScalarizationResult::safe(); return ScalarizationResult::unsafe(); } unsigned IntWidth = Idx->getType()->getScalarSizeInBits(); APInt Zero(IntWidth, 0); - APInt MaxElts(IntWidth, VecTy->getNumElements()); + APInt MaxElts(IntWidth, NumElements); ConstantRange ValidIndices(Zero, MaxElts); ConstantRange IdxRange(IntWidth, true); @@ -1075,7 +1092,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) { auto *SI = cast(&I); if (!SI->isSimple() || - !isa(SI->getValueOperand()->getType())) + !isa(SI->getValueOperand()->getType())) return false; // TODO: Combine more complicated patterns (multiple insert) by referencing @@ -1089,7 +1106,7 @@ return false; if (auto *Load = dyn_cast(Source)) { - auto VecTy = cast(SI->getValueOperand()->getType()); + auto VecTy = cast(SI->getValueOperand()->getType()); const DataLayout &DL = I.getModule()->getDataLayout(); Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts(); // Don't optimize for atomic/volatile load or store. Ensure memory is not Index: llvm/test/Transforms/VectorCombine/load-insert-store.ll =================================================================== --- llvm/test/Transforms/VectorCombine/load-insert-store.ll +++ llvm/test/Transforms/VectorCombine/load-insert-store.ll @@ -47,12 +47,18 @@ } define void @insert_store_vscale(ptr %q, i16 zeroext %s) { -; CHECK-LABEL: @insert_store_vscale( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 -; CHECK-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i16 [[S:%.*]], i32 3 -; CHECK-NEXT: store [[VECINS]], ptr [[Q]], align 16 -; CHECK-NEXT: ret void +; LE-LABEL: @insert_store_vscale( +; LE-NEXT: entry: +; LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds , ptr [[Q:%.*]], i32 0, i32 3 +; LE-NEXT: store i16 [[S:%.*]], ptr [[TMP0]], align 2 +; LE-NEXT: ret void +; +; BE-LABEL: @insert_store_vscale( +; BE-NEXT: entry: +; BE-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 +; BE-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i16 [[S:%.*]], i32 3 +; BE-NEXT: store [[VECINS]], ptr [[Q]], align 16 +; BE-NEXT: ret void ; entry: %0 = load , ptr %q @@ -247,14 +253,22 @@ ; To verify the index is not a constant but valid by assume, ; for scalable vector types. define void @insert_store_vscale_nonconst_index_known_valid_by_assume(ptr %q, i8 zeroext %s, i32 %idx) { -; CHECK-LABEL: @insert_store_vscale_nonconst_index_known_valid_by_assume( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 -; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 -; CHECK-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] -; CHECK-NEXT: store [[VECINS]], ptr [[Q]], align 16 -; CHECK-NEXT: ret void +; LE-LABEL: @insert_store_vscale_nonconst_index_known_valid_by_assume( +; LE-NEXT: entry: +; LE-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 +; LE-NEXT: call void @llvm.assume(i1 [[CMP]]) +; LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds , ptr [[Q:%.*]], i32 0, i32 [[IDX]] +; LE-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 +; LE-NEXT: ret void +; +; BE-LABEL: @insert_store_vscale_nonconst_index_known_valid_by_assume( +; BE-NEXT: entry: +; BE-NEXT: [[CMP:%.*]] = icmp ult i32 [[IDX:%.*]], 4 +; BE-NEXT: call void @llvm.assume(i1 [[CMP]]) +; BE-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 +; BE-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i8 [[S:%.*]], i32 [[IDX]] +; BE-NEXT: store [[VECINS]], ptr [[Q]], align 16 +; BE-NEXT: ret void ; entry: %cmp = icmp ult i32 %idx, 4 @@ -349,13 +363,20 @@ ; To verify the index is not a constant but valid by and, ; for scalable vector types. define void @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and(ptr %q, i8 zeroext %s, i32 noundef %idx) { -; CHECK-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 -; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 -; CHECK-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store [[VECINS]], ptr [[Q]], align 16 -; CHECK-NEXT: ret void +; LE-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and( +; LE-NEXT: entry: +; LE-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 +; LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds , ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; LE-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 +; LE-NEXT: ret void +; +; BE-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_and( +; BE-NEXT: entry: +; BE-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 +; BE-NEXT: [[IDX_CLAMPED:%.*]] = and i32 [[IDX:%.*]], 7 +; BE-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] +; BE-NEXT: store [[VECINS]], ptr [[Q]], align 16 +; BE-NEXT: ret void ; entry: %0 = load , ptr %q @@ -491,13 +512,20 @@ ; To verify the index is not a constant but valid by urem, ; for scalable vector types. define void @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem(ptr %q, i8 zeroext %s, i32 noundef %idx) { -; CHECK-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 -; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 -; CHECK-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] -; CHECK-NEXT: store [[VECINS]], ptr [[Q]], align 16 -; CHECK-NEXT: ret void +; LE-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem( +; LE-NEXT: entry: +; LE-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 +; LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds , ptr [[Q:%.*]], i32 0, i32 [[IDX_CLAMPED]] +; LE-NEXT: store i8 [[S:%.*]], ptr [[TMP0]], align 1 +; LE-NEXT: ret void +; +; BE-LABEL: @insert_store_vscale_nonconst_index_known_noundef_and_valid_by_urem( +; BE-NEXT: entry: +; BE-NEXT: [[TMP0:%.*]] = load , ptr [[Q:%.*]], align 16 +; BE-NEXT: [[IDX_CLAMPED:%.*]] = urem i32 [[IDX:%.*]], 16 +; BE-NEXT: [[VECINS:%.*]] = insertelement [[TMP0]], i8 [[S:%.*]], i32 [[IDX_CLAMPED]] +; BE-NEXT: store [[VECINS]], ptr [[Q]], align 16 +; BE-NEXT: ret void ; entry: %0 = load , ptr %q @@ -818,6 +846,3 @@ declare i32 @bar(i32, i1) readonly declare double @llvm.log2.f64(double) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; BE: {{.*}} -; LE: {{.*}}