Index: llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp =================================================================== --- llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -105,6 +105,11 @@ return true; } +static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, + unsigned Idx) { + return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx; +} + // Translate a masked load intrinsic like // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, // <16 x i1> %mask, <16 x i32> %passthru) @@ -137,8 +142,8 @@ // %10 = extractelement <16 x i1> %mask, i32 2 // br i1 %10, label %cond.load4, label %else5 // -static void scalarizeMaskedLoad(CallInst *CI, DomTreeUpdater *DTU, - bool &ModifiedDT) { +static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, + DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -207,7 +212,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -278,8 +284,8 @@ // store i32 %6, i32* %7 // br label %else2 // . . . -static void scalarizeMaskedStore(CallInst *CI, DomTreeUpdater *DTU, - bool &ModifiedDT) { +static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, + DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -340,7 +346,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -405,8 +412,8 @@ // . . . // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src // ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI, DomTreeUpdater *DTU, - bool &ModifiedDT) { +static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, + DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Ptrs = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -461,7 +468,8 @@ Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -534,8 +542,8 @@ // store i32 %Elt1, i32* %Ptr1, align 4 // br label %else2 // . . . -static void scalarizeMaskedScatter(CallInst *CI, DomTreeUpdater *DTU, - bool &ModifiedDT) { +static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, + DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptrs = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -587,7 +595,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -623,8 +632,8 @@ ModifiedDT = true; } -static void scalarizeMaskedExpandLoad(CallInst *CI, DomTreeUpdater *DTU, - bool &ModifiedDT) { +static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, + DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); Value *Mask = CI->getArgOperand(1); Value *PassThru = CI->getArgOperand(2); @@ -692,7 +701,8 @@ Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -749,7 +759,8 @@ ModifiedDT = true; } -static void scalarizeMaskedCompressStore(CallInst *CI, DomTreeUpdater *DTU, +static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, + DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); @@ -800,7 +811,8 @@ // Value *Predicate; if (VectorWidth != 1) { - Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx)); + Value *Mask = Builder.getInt(APInt::getOneBitSet( + VectorWidth, adjustForEndian(DL, VectorWidth, Idx))); Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask), Builder.getIntN(VectorWidth, 0)); } else { @@ -934,14 +946,14 @@ CI->getType(), cast(CI->getArgOperand(1))->getAlignValue())) return false; - scalarizeMaskedLoad(CI, DTU, ModifiedDT); + scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT); return true; case Intrinsic::masked_store: if (TTI.isLegalMaskedStore( CI->getArgOperand(0)->getType(), cast(CI->getArgOperand(2))->getAlignValue())) return false; - scalarizeMaskedStore(CI, DTU, ModifiedDT); + scalarizeMaskedStore(DL, CI, DTU, ModifiedDT); return true; case Intrinsic::masked_gather: { unsigned AlignmentInt = @@ -951,7 +963,7 @@ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), LoadTy); if (TTI.isLegalMaskedGather(LoadTy, Alignment)) return false; - scalarizeMaskedGather(CI, DTU, ModifiedDT); + scalarizeMaskedGather(DL, CI, DTU, ModifiedDT); return true; } case Intrinsic::masked_scatter: { @@ -962,18 +974,18 @@ DL.getValueOrABITypeAlignment(MaybeAlign(AlignmentInt), StoreTy); if (TTI.isLegalMaskedScatter(StoreTy, Alignment)) return false; - scalarizeMaskedScatter(CI, DTU, ModifiedDT); + scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT); return true; } case Intrinsic::masked_expandload: if (TTI.isLegalMaskedExpandLoad(CI->getType())) return false; - scalarizeMaskedExpandLoad(CI, DTU, ModifiedDT); + scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT); return true; case Intrinsic::masked_compressstore: if (TTI.isLegalMaskedCompressStore(CI->getArgOperand(0)->getType())) return false; - scalarizeMaskedCompressStore(CI, DTU, ModifiedDT); + scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT); return true; } }