diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp --- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -94,6 +94,11 @@ return true; } +static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, + unsigned Idx) { + return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx; +} + // Translate a masked load intrinsic like // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, // <16 x i1> %mask, <16 x i32> %passthru) @@ -126,7 +131,8 @@ // %10 = extractelement <16 x i1> %mask, i32 2 // br i1 %10, label %cond.load4, label %else5 // -static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) { +static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, + bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -195,7 +201,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -265,7 +272,8 @@ // store i32 %6, i32* %7 // br label %else2 // . . . -static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) { +static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, + bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -327,7 +335,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -391,7 +400,8 @@ // . . . // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src // ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) { +static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, + bool &ModifiedDT) { Value *Ptrs = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -446,7 +456,8 @@ Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -515,7 +526,8 @@ // store i32 %Elt1, i32* %Ptr1, align 4 // br label %else2 // . . . -static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { +static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, + bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptrs = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -568,7 +580,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -601,7 +614,8 @@ ModifiedDT = true; } -static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) { +static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, + bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); Value *Mask = CI->getArgOperand(1); Value *PassThru = CI->getArgOperand(2); @@ -669,7 +683,8 @@ Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -725,7 +740,8 @@ ModifiedDT = true; } -static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) { +static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, + bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -775,7 +791,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -895,14 +912,14 @@ CI->getType(), cast(CI->getArgOperand(1))->getAlignValue())) return false; - scalarizeMaskedLoad(CI, ModifiedDT); + scalarizeMaskedLoad(DL, CI, ModifiedDT); return true; case Intrinsic::masked_store: if (TTI.isLegalMaskedStore( CI->getArgOperand(0)->getType(), cast(CI->getArgOperand(2))->getAlignValue())) return false; - scalarizeMaskedStore(CI, ModifiedDT); + scalarizeMaskedStore(DL, CI, ModifiedDT); return true; case Intrinsic::masked_gather: { unsigned AlignmentInt = @@ -912,7 +929,7 @@ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); if (TTI.isLegalMaskedGather(LoadTy, Alignment)) return false; - scalarizeMaskedGather(CI, ModifiedDT); + scalarizeMaskedGather(DL, CI, ModifiedDT); return true; } case Intrinsic::masked_scatter: { @@ -923,18 +940,18 @@ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); if (TTI.isLegalMaskedScatter(StoreTy, Alignment)) return false; - scalarizeMaskedScatter(CI, ModifiedDT); + scalarizeMaskedScatter(DL, CI, ModifiedDT); return true; } case Intrinsic::masked_expandload: if (TTI.isLegalMaskedExpandLoad(CI->getType())) return false; - scalarizeMaskedExpandLoad(CI, ModifiedDT); + scalarizeMaskedExpandLoad(DL, CI, ModifiedDT); return true; case Intrinsic::masked_compressstore: if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) return false; - scalarizeMaskedCompressStore(CI, ModifiedDT); + scalarizeMaskedCompressStore(DL, CI, ModifiedDT); return true; } }