diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -123,10 +123,11 @@ /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them /// to binop(shuffle(x), shuffle(y)) to allow the formation of an /// interleaving load. Any newly created shuffles that operate on \p LI will - /// be added to \p Shuffles. - bool tryReplaceBinOpShuffles(ArrayRef BinOpShuffles, - SmallVectorImpl &Shuffles, - LoadInst *LI); + /// be added to \p Shuffles. Returns true, if any changes to the IR have been + /// made. + bool replaceBinOpShuffles(ArrayRef BinOpShuffles, + SmallVectorImpl &Shuffles, + LoadInst *LI); }; } // end anonymous namespace. @@ -369,14 +370,17 @@ // use the shufflevector instructions instead of the load. if (!tryReplaceExtracts(Extracts, Shuffles)) return false; - if (!tryReplaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI)) - return false; + + bool BinOpShuffleChanged = + replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI); LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); // Try to create target specific intrinsics to replace the load and shuffles. - if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) - return false; + if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) { + // If Extracts is not empty, tryReplaceExtracts made changes earlier. + return !Extracts.empty() || BinOpShuffleChanged; + } for (auto SVI : Shuffles) DeadInsts.push_back(SVI); @@ -385,7 +389,7 @@ return true; } -bool InterleavedAccess::tryReplaceBinOpShuffles( +bool InterleavedAccess::replaceBinOpShuffles( ArrayRef BinOpShuffles, SmallVectorImpl &Shuffles, LoadInst *LI) { for (auto *SVI : BinOpShuffles) { @@ -410,7 +414,8 @@ if (NewSVI2->getOperand(0) == LI) Shuffles.push_back(NewSVI2); } - return true; + + return !BinOpShuffles.empty(); } bool InterleavedAccess::tryReplaceExtracts( diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -interleaved-access -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +; No interleaved load instruction is generated, but the shuffle is moved just +; after the load. +define <2 x double> @shuffle_binop_fol(<4 x double>* %ptr) { +; CHECK-LABEL: @shuffle_binop_fol( +; CHECK-NEXT: vector.body.preheader: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8 +; CHECK-NEXT: [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> +; CHECK-NEXT: [[EXTRACTED2:%.*]] = shufflevector <4 x double> , <4 x double> undef, <2 x i32> +; CHECK-NEXT: [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]] +; CHECK-NEXT: ret <2 x double> [[FADD3]] +; +vector.body.preheader: + %wide.load = load <4 x double>, <4 x double>* %ptr, align 8 + %fadd = fadd <4 x double> %wide.load, + %extracted = shufflevector <4 x double> %fadd, <4 x double> undef, <2 x i32> + ret <2 x double> %extracted +} + +; No interleaved load instruction is generated, but the extractelement +; instructions are updated to use the shuffle instead of the load. +define void @shuffle_extract(<4 x double>* %ptr, i1 %c) { +; CHECK-LABEL: @shuffle_extract( +; CHECK-NEXT: vector.body.preheader: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8 +; CHECK-NEXT: [[EXTRACTED:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_MERGE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[EXTRACTED]], i64 0 +; CHECK-NEXT: call void @use(double [[TMP0]]) +; CHECK-NEXT: br label [[IF_MERGE]] +; CHECK: if.merge: +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[EXTRACTED]], i64 1 +; CHECK-NEXT: call void @use(double [[TMP1]]) +; CHECK-NEXT: ret void +; +vector.body.preheader: + %wide.load = load <4 x double>, <4 x double>* %ptr, align 8 + %extracted = shufflevector <4 x double> %wide.load, <4 x double> undef, <2 x i32> + br i1 %c, label %if.then, label %if.merge + +if.then: + %e0 = extractelement <4 x double> %wide.load, i32 0 + call void @use(double %e0) + br label %if.merge + +if.merge: + %e1 = extractelement <4 x double> %wide.load, i32 2 + call void @use(double %e1) + ret void +} + +declare void @use(double)