Index: llvm/lib/CodeGen/InterleavedAccessPass.cpp =================================================================== --- llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -240,8 +240,10 @@ continue; } if (auto *BI = dyn_cast(User)) { - if (all_of(BI->users(), - [](auto *U) { return isa(U); })) { + if (all_of(BI->users(), [](auto *U) { + auto *SVI = dyn_cast(U); + return SVI && isa(SVI->getOperand(1)); + })) { for (auto *SVI : BI->users()) BinOpShuffles.insert(cast(SVI)); continue; Index: llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll =================================================================== --- llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll +++ llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll @@ -142,3 +142,41 @@ %l8 = fadd fast <4 x float> %l6, %l5 ret <4 x float> %l8 } + +define void @noncanonical(ptr %p0, ptr %p1, ptr %p2) { +; CHECK-LABEL: @noncanonical( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V0:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 8 +; CHECK-NEXT: [[V1:%.*]] = add <8 x i8> [[V0]], +; CHECK-NEXT: [[V2:%.*]] = load <8 x i8>, ptr [[P1:%.*]], align 8 +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <8 x i8> [[V2]], <8 x i8> [[V1]], <4 x i32> +; CHECK-NEXT: store <4 x i8> [[SHUFFLED]], ptr [[P2:%.*]], align 4 +; CHECK-NEXT: ret void +; +entry: + %v0 = load <8 x i8>, ptr %p0 + %v1 = add <8 x i8> %v0, + %v2 = load <8 x i8>, ptr %p1 + %shuffled = shufflevector <8 x i8> %v2, <8 x i8> %v1, <4 x i32> + store <4 x i8> %shuffled, ptr %p2 + ret void +} + +define void @noncanonical_extmask(ptr %p0, ptr %p1, ptr %p2) { +; CHECK-LABEL: @noncanonical_extmask( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V0:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 8 +; CHECK-NEXT: [[V1:%.*]] = add <8 x i8> [[V0]], +; CHECK-NEXT: [[V2:%.*]] = load <8 x i8>, ptr [[P1:%.*]], align 8 +; CHECK-NEXT: [[SHUFFLED:%.*]] = shufflevector <8 x i8> [[V2]], <8 x i8> [[V1]], <8 x i32> +; CHECK-NEXT: store <8 x i8> [[SHUFFLED]], ptr [[P2:%.*]], align 8 +; CHECK-NEXT: ret void +; +entry: + %v0 = load <8 x i8>, ptr %p0 + %v1 = add <8 x i8> %v0, + %v2 = load <8 x i8>, ptr %p1 + %shuffled = shufflevector <8 x i8> %v2, <8 x i8> %v1, <8 x i32> + store <8 x i8> %shuffled, ptr %p2 + ret void +}