Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Transforms/Utils/Local.h" @@ -1183,17 +1184,26 @@ } // TODO, Obvious Missing Transforms: -// * Dereferenceable address -> speculative load/select // * Narrow width by halfs excluding zero/undef lanes static Value *simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { + Value *LoadPtr = II.getArgOperand(0); + unsigned Alignment = cast(II.getArgOperand(1))->getZExtValue(); + // If the mask is all ones or undefs, this is a plain vector load of the 1st // argument. - if (maskIsAllOneOrUndef(II.getArgOperand(2))) { - Value *LoadPtr = II.getArgOperand(0); - unsigned Alignment = cast(II.getArgOperand(1))->getZExtValue(); + if (maskIsAllOneOrUndef(II.getArgOperand(2))) return Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, "unmaskedload"); + + // If we can unconditionally load from this address, replace with a + // load/select idiom. TODO: use DT for context sensitive query + if (isDereferenceableAndAlignedPointer(LoadPtr, Alignment, + II.getModule()->getDataLayout(), + &II, nullptr)) { + Value *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment, + "unmaskedload"); + return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3)); } return nullptr; Index: llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll +++ llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll @@ -87,8 +87,9 @@ ; CHECK-LABEL: @load_speculative( ; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 ; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; double %pt, <2 x i1> %mask) { %ptv1 = insertelement <2 x double> undef, double %pt, i64 0