Index: llvm/trunk/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/trunk/lib/Analysis/ConstantFolding.cpp +++ llvm/trunk/lib/Analysis/ConstantFolding.cpp @@ -1299,6 +1299,7 @@ case Intrinsic::fmuladd: case Intrinsic::copysign: case Intrinsic::round: + case Intrinsic::masked_load: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: @@ -1843,11 +1844,44 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, VectorType *VTy, ArrayRef Operands, + const DataLayout &DL, const TargetLibraryInfo *TLI) { SmallVector Result(VTy->getNumElements()); SmallVector Lane(Operands.size()); Type *Ty = VTy->getElementType(); + if (IntrinsicID == Intrinsic::masked_load) { + auto *SrcPtr = Operands[0]; + auto *Mask = Operands[2]; + auto *Passthru = Operands[3]; + Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, VTy, DL); + if (!VecData) + return nullptr; + + SmallVector NewElements; + for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { + auto *MaskElt = + dyn_cast_or_null(Mask->getAggregateElement(I)); + if (!MaskElt) + break; + if (MaskElt->isZero()) { + auto *PassthruElt = Passthru->getAggregateElement(I); + if (!PassthruElt) + break; + NewElements.push_back(PassthruElt); + } else { + assert(MaskElt->isOne()); + auto *VecElt = VecData->getAggregateElement(I); + if (!VecElt) + break; + NewElements.push_back(VecElt); + } + } + if (NewElements.size() == VTy->getNumElements()) + return ConstantVector::get(NewElements); + return nullptr; + } + for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { @@ -1880,7 +1914,8 @@ Type *Ty = F->getReturnType(); if (auto *VTy = dyn_cast(Ty)) - return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, TLI); + return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, + F->getParent()->getDataLayout(), TLI); return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI); } Index: llvm/trunk/test/Transforms/InstSimplify/call.ll =================================================================== --- llvm/trunk/test/Transforms/InstSimplify/call.ll +++ llvm/trunk/test/Transforms/InstSimplify/call.ll @@ -204,4 +204,15 @@ ; CHECK-LABEL: define i32 @call_undef( ; CHECK: ret i32 undef +@GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49] + +define <8 x i32> @partial_masked_load() { +; CHECK-LABEL: @partial_masked_load( +; CHECK: ret <8 x i32> + %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -2) to <8 x i32>*), i32 4, <8 x i1> , <8 x i32> undef) + ret <8 x i32> %masked.load +} + declare noalias i8* @malloc(i64) + +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)