diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -769,6 +769,10 @@ // of the loads is to form a wider load. static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA) { + // Only consider load chains of scalar values. + if (isa(I.getType())) + return false; + LoadOps LOps; if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot) return false; diff --git a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s + +; Use the same idoim as X86/or-load.ll @loadCombine_2consecutive to represent a +; sequence of operations that can be replaced by a single double-width load when +; using scalar types but whose logic does not apply to fixed length vectors. +define <8 x i16> @or-load-fixed-length-vector(ptr %p1) { +; CHECK-LABEL: @or-load-fixed-length-vector( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P1:%.*]], i32 1 +; CHECK-NEXT: [[L1:%.*]] = load <8 x i8>, ptr [[P1]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load <8 x i8>, ptr [[P2]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext <8 x i8> [[L1]] to <8 x i16> +; CHECK-NEXT: [[E2:%.*]] = zext <8 x i8> [[L2]] to <8 x i16> +; CHECK-NEXT: [[S2:%.*]] = shl <8 x i16> [[E2]], +; CHECK-NEXT: [[OR:%.*]] = or <8 x i16> [[E1]], [[S2]] +; CHECK-NEXT: ret <8 x i16> [[OR]] +; + %p2 = getelementptr i8, ptr %p1, i32 1 + %l1 = load <8 x i8>, ptr %p1, align 1 + %l2 = load <8 x i8>, ptr %p2, align 1 + %e1 = zext <8 x i8> %l1 to <8 x i16> + %e2 = zext <8 x i8> %l2 to <8 x i16> + %s2 = shl <8 x i16> %e2, + %or = or <8 x i16> %e1, %s2 + ret <8 x i16> %or +} + +; Use the same idoim as X86/or-load.ll @loadCombine_2consecutive to represent a +; sequence of operations that can be replaced by a single double-width load when +; using scalar types but whose logic does not apply to scalable length vectors. +define @or-load-scalable-vector(ptr %p1) { +; CHECK-LABEL: @or-load-scalable-vector( +; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P1:%.*]], i32 1 +; CHECK-NEXT: [[L1:%.*]] = load , ptr [[P1]], align 1 +; CHECK-NEXT: [[L2:%.*]] = load , ptr [[P2]], align 1 +; CHECK-NEXT: [[E1:%.*]] = zext [[L1]] to +; CHECK-NEXT: [[E2:%.*]] = zext [[L2]] to +; CHECK-NEXT: [[S2:%.*]] = shl [[E2]], shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[OR:%.*]] = or [[E1]], [[S2]] +; CHECK-NEXT: ret [[OR]] +; + %p2 = getelementptr i8, ptr %p1, i32 1 + %l1 = load , ptr %p1, align 1 + %l2 = load , ptr %p2, align 1 + %e1 = zext %l1 to + %e2 = zext %l2 to + %s2 = shl %e2, shufflevector ( insertelement ( poison, i16 8, i32 0), poison, zeroinitializer) + %or = or %e1, %s2 + ret %or +}