diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -434,11 +434,14 @@ m_OneUse(m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask)))))) return false; - // Disallow non-vector casts and length-changing shuffles. + // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for + // scalable type is unknown; Second, we cannot reason if the narrowed shuffle + // mask for scalable type is a splat or not. + // 2) Disallow non-vector casts and length-changing shuffles. // TODO: We could allow any shuffle. - auto *DestTy = dyn_cast(I.getType()); - auto *SrcTy = cast(V->getType()); - if (!DestTy || I.getOperand(0)->getType() != SrcTy) + auto *DestTy = dyn_cast(I.getType()); + auto *SrcTy = dyn_cast(V->getType()); + if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy) return false; // The new shuffle must not cost more than the old shuffle. The bitcast is @@ -447,10 +450,8 @@ TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, SrcTy)) return false; - // FIXME: it should be possible to implement the computation of the widened - // shuffle mask in terms of ElementCount to work with scalable shuffles. - unsigned DestNumElts = cast(DestTy)->getNumElements(); - unsigned SrcNumElts = cast(SrcTy)->getNumElements(); + unsigned DestNumElts = DestTy->getNumElements(); + unsigned SrcNumElts = SrcTy->getNumElements(); SmallVector NewMask; if (SrcNumElts <= DestNumElts) { // The bitcast is from wide to narrow/equal elements. The shuffle mask can diff --git a/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg b/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/vscale-bitcast-shuffle.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -vector-combine -S -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test checks we are not crashing with TTI when trying to get shuffle cost. +; This test also check that shuffle mask zeroinitializer is +; not narrowed into <0, 1, 0, 1, ...>, which we cannot reason if it's a valid +; splat or not. + +define @bitcast_shuffle( %a) { +; CHECK-LABEL: @bitcast_shuffle( +; CHECK-NEXT: [[I:%.*]] = shufflevector [[A:%.*]], undef, zeroinitializer +; CHECK-NEXT: [[R:%.*]] = bitcast [[I]] to +; CHECK-NEXT: ret [[R]] +; + %i = shufflevector %a, undef, zeroinitializer + %r = bitcast %i to + ret %r +}