Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -373,18 +373,10 @@ /// A helper function that returns true if the given type is irregular. The /// type is irregular if its allocated size doesn't equal the store size of an /// element of the corresponding vector type at the given vectorization factor. -static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { +static bool hasIrregularType(Type *Ty, const DataLayout &DL) { // Determine if an array of VF elements of type Ty is "bitcast compatible" // with a vector. - if (VF.isVector()) { - auto *VectorTy = VectorType::get(Ty, VF); - return TypeSize::get(VF.getKnownMinValue() * - DL.getTypeAllocSize(Ty).getFixedValue(), - VF.isScalable()) != DL.getTypeStoreSize(VectorTy); - } - - // If the vectorization factor is one, we just check if an array of type Ty - // requires padding between elements. + // This is only true if there is no padding between the array elements. return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } @@ -5087,7 +5079,7 @@ // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = getMemInstValueType(I); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; // Check if masking is required. @@ -5134,7 +5126,7 @@ // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; return true; Index: llvm/test/Transforms/LoopVectorize/irregular_type.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/irregular_type.ll @@ -0,0 +1,24 @@ +; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +; CHECK: foo +; CHECK: vector.body +; CHECK-NOT: load <4 x i7> +; CHECK-NOT: store <4 x i7> +; CHECK: for.body +define void @foo(i7* %a, i64 %n) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv + %0 = load i7, i7* %arrayidx, align 1 + %sub = add nuw nsw i7 %0, 0 + store i7 %sub, i7* %arrayidx, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp = icmp eq i64 %indvars.iv.next, %n + br i1 %cmp, label %for.exit, label %for.body + +for.exit: + ret void +}