Index: llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp =================================================================== --- llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ llvm/trunk/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -616,6 +616,24 @@ // The result vector Value *VResult = PassThru; + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + LoadInst *Load = + Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx)); + VResult = + Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx)); + ++MemIndex; + } + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // @@ -694,6 +712,22 @@ unsigned VectorWidth = VecType->getNumElements(); + // Shorten the way if the mask is a vector of constants. + if (isConstantIntVector(Mask)) { + unsigned MemIndex = 0; + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getAggregateElement(Idx)->isNullValue()) + continue; + Value *OneElt = + Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); + Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex); + Builder.CreateAlignedStore(OneElt, NewPtr, 1); + ++MemIndex; + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // Index: llvm/trunk/test/CodeGen/X86/masked_compressstore.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_compressstore.ll +++ llvm/trunk/test/CodeGen/X86/masked_compressstore.ll @@ -1123,7 +1123,7 @@ define void @compressstore_v16f32_const(float* %base, <16 x float> %V) { ; SSE2-LABEL: compressstore_v16f32_const: -; SSE2: ## %bb.0: ## %cond.store +; SSE2: ## %bb.0: ; SSE2-NEXT: movss %xmm0, (%rdi) ; SSE2-NEXT: movaps %xmm0, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] @@ -1160,7 +1160,7 @@ ; SSE2-NEXT: retq ; ; SSE42-LABEL: compressstore_v16f32_const: -; SSE42: ## %bb.0: ## %cond.store +; SSE42: ## %bb.0: ; SSE42-NEXT: movups %xmm0, (%rdi) ; SSE42-NEXT: movups %xmm1, 16(%rdi) ; SSE42-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] @@ -1171,7 +1171,7 @@ ; SSE42-NEXT: retq ; ; AVX1-LABEL: compressstore_v16f32_const: -; AVX1: ## %bb.0: ## %cond.store +; AVX1: ## %bb.0: ; AVX1-NEXT: vmovups %ymm0, (%rdi) ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0] @@ -1183,7 +1183,7 @@ ; AVX1-NEXT: retq ; ; AVX2-LABEL: compressstore_v16f32_const: -; AVX2: ## %bb.0: ## %cond.store +; AVX2: ## %bb.0: ; AVX2-NEXT: vmovups %ymm0, (%rdi) ; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [0,1,2,4] ; AVX2-NEXT: vpermps %ymm1, %ymm0, %ymm0 Index: llvm/trunk/test/CodeGen/X86/masked_expandload.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_expandload.ll +++ llvm/trunk/test/CodeGen/X86/masked_expandload.ll @@ -1368,7 +1368,7 @@ define <4 x float> @expandload_v4f32_const(float* %base, <4 x float> %src0) { ; SSE2-LABEL: expandload_v4f32_const: -; SSE2: ## %bb.0: ## %cond.load +; SSE2: ## %bb.0: ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[0,0] @@ -1379,7 +1379,7 @@ ; SSE2-NEXT: retq ; ; SSE42-LABEL: expandload_v4f32_const: -; SSE42: ## %bb.0: ## %cond.load +; SSE42: ## %bb.0: ; SSE42-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE42-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] @@ -1387,7 +1387,7 @@ ; SSE42-NEXT: retq ; ; AVX1OR2-LABEL: expandload_v4f32_const: -; AVX1OR2: ## %bb.0: ## %cond.load +; AVX1OR2: ## %bb.0: ; AVX1OR2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] @@ -1423,7 +1423,7 @@ define <16 x float> @expandload_v16f32_const(float* %base, <16 x float> %src0) { ; SSE2-LABEL: expandload_v16f32_const: -; SSE2: ## %bb.0: ## %cond.load +; SSE2: ## %bb.0: ; SSE2-NEXT: movups (%rdi), %xmm0 ; SSE2-NEXT: movups 16(%rdi), %xmm1 ; SSE2-NEXT: movss {{.*#+}} xmm5 = mem[0],zero,zero,zero @@ -1443,7 +1443,7 @@ ; SSE2-NEXT: retq ; ; SSE42-LABEL: expandload_v16f32_const: -; SSE42: ## %bb.0: ## %cond.load +; SSE42: ## %bb.0: ; SSE42-NEXT: movups (%rdi), %xmm0 ; SSE42-NEXT: movups 16(%rdi), %xmm1 ; SSE42-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero @@ -1457,7 +1457,7 @@ ; SSE42-NEXT: retq ; ; AVX1OR2-LABEL: expandload_v16f32_const: -; AVX1OR2: ## %bb.0: ## %cond.load +; AVX1OR2: ## %bb.0: ; AVX1OR2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] @@ -1507,7 +1507,7 @@ define <16 x float> @expandload_v16f32_const_undef(float* %base) { ; SSE2-LABEL: expandload_v16f32_const_undef: -; SSE2: ## %bb.0: ## %cond.load +; SSE2: ## %bb.0: ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] @@ -1517,7 +1517,7 @@ ; SSE2-NEXT: retq ; ; SSE42-LABEL: expandload_v16f32_const_undef: -; SSE42: ## %bb.0: ## %cond.load +; SSE42: ## %bb.0: ; SSE42-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE42-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] ; SSE42-NEXT: movups (%rdi), %xmm0 @@ -1526,7 +1526,7 @@ ; SSE42-NEXT: retq ; ; AVX1OR2-LABEL: expandload_v16f32_const_undef: -; AVX1OR2: ## %bb.0: ## %cond.load +; AVX1OR2: ## %bb.0: ; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX1OR2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; AVX1OR2-NEXT: vinsertf128 $1, 44(%rdi), %ymm0, %ymm1 @@ -2991,18 +2991,18 @@ define <2 x i64> @expandload_v2i64_const(i64* %base, <2 x i64> %src0) { ; SSE2-LABEL: expandload_v2i64_const: -; SSE2: ## %bb.0: ## %else +; SSE2: ## %bb.0: ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE42-LABEL: expandload_v2i64_const: -; SSE42: ## %bb.0: ## %else +; SSE42: ## %bb.0: ; SSE42-NEXT: pinsrq $1, (%rdi), %xmm0 ; SSE42-NEXT: retq ; ; AVX1OR2-LABEL: expandload_v2i64_const: -; AVX1OR2: ## %bb.0: ## %else +; AVX1OR2: ## %bb.0: ; AVX1OR2-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0 ; AVX1OR2-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/pr39666.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr39666.ll +++ llvm/trunk/test/CodeGen/X86/pr39666.ll @@ -3,7 +3,7 @@ define <2 x i64> @test5(i64* %base, <2 x i64> %src0) { ; CHECK-LABEL: test5: -; CHECK: # %bb.0: # %else +; CHECK: # %bb.0: ; CHECK-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x i64> @llvm.masked.expandload.v2i64(i64* %base, <2 x i1> , <2 x i64> %src0) Index: llvm/trunk/test/CodeGen/X86/pr40994.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr40994.ll +++ llvm/trunk/test/CodeGen/X86/pr40994.ll @@ -3,7 +3,7 @@ define <8 x i8> @foo(<16 x i8> %a) { ; CHECK-LABEL: foo: -; CHECK: # %bb.0: # %cond.store +; CHECK: # %bb.0: ; CHECK-NEXT: pextrb $0, %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: pextrb $2, %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: pextrb $4, %xmm0, -{{[0-9]+}}(%rsp) Index: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll =================================================================== --- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll +++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-compressstore.ll @@ -27,20 +27,12 @@ define void @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %data) { ; CHECK-LABEL: @scalarize_v2i64_ones_mask( -; CHECK-NEXT: br i1 true, label [[COND_STORE:%.*]], label [[ELSE:%.*]] -; CHECK: cond.store: -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0 -; CHECK-NEXT: store i64 [[TMP1]], i64* [[P:%.*]], align 1 +; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0 +; CHECK-NEXT: store i64 [[ELT0]], i64* [[TMP1]], align 1 +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x i64> [[DATA]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ] -; CHECK-NEXT: br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]] -; CHECK: cond.store1: -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1 -; CHECK-NEXT: store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1 -; CHECK-NEXT: br label [[ELSE2]] -; CHECK: else2: +; CHECK-NEXT: store i64 [[ELT1]], i64* [[TMP2]], align 1 ; CHECK-NEXT: ret void ; call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> ) @@ -49,20 +41,6 @@ define void @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %data) { ; CHECK-LABEL: @scalarize_v2i64_zero_mask( -; CHECK-NEXT: br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]] -; CHECK: cond.store: -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0 -; CHECK-NEXT: store i64 [[TMP1]], i64* [[P:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ] -; CHECK-NEXT: br i1 false, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]] -; CHECK: cond.store1: -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1 -; CHECK-NEXT: store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1 -; CHECK-NEXT: br label [[ELSE2]] -; CHECK: else2: ; CHECK-NEXT: ret void ; call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> ) @@ -71,20 +49,9 @@ define void @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %data) { ; CHECK-LABEL: @scalarize_v2i64_const_mask( -; CHECK-NEXT: br i1 false, label [[COND_STORE:%.*]], label [[ELSE:%.*]] -; CHECK: cond.store: -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0 -; CHECK-NEXT: store i64 [[TMP1]], i64* [[P:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP2]], [[COND_STORE]] ], [ [[P]], [[TMP0:%.*]] ] -; CHECK-NEXT: br i1 true, label [[COND_STORE1:%.*]], label [[ELSE2:%.*]] -; CHECK: cond.store1: -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA]], i64 1 -; CHECK-NEXT: store i64 [[TMP3]], i64* [[PTR_PHI_ELSE]], align 1 -; CHECK-NEXT: br label [[ELSE2]] -; CHECK: else2: +; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0 +; CHECK-NEXT: store i64 [[ELT1]], i64* [[TMP1]], align 1 ; CHECK-NEXT: ret void ; call void @llvm.masked.compressstore.v2i64.p0v2i64(<2 x i64> %data, i64* %p, <2 x i1> ) Index: llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll =================================================================== --- llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll +++ llvm/trunk/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-expandload.ll @@ -29,23 +29,13 @@ define <2 x i64> @scalarize_v2i64_ones_mask(i64* %p, <2 x i64> %passthru) { ; CHECK-LABEL: @scalarize_v2i64_ones_mask( -; CHECK-NEXT: br i1 true, label [[COND_LOAD:%.*]], label [[ELSE:%.*]] -; CHECK: cond.load: -; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ] -; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ] -; CHECK-NEXT: br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]] -; CHECK: cond.load1: -; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1 -; CHECK-NEXT: br label [[ELSE2]] -; CHECK: else2: -; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ] -; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0 +; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[TMP1]], align 1 +; CHECK-NEXT: [[RES0:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[TMP2]], align 1 +; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[RES0]], i64 [[LOAD1]], i64 1 +; CHECK-NEXT: ret <2 x i64> [[RES1]] ; %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> , <2 x i64> %passthru) ret <2 x i64> %ret @@ -53,23 +43,7 @@ define <2 x i64> @scalarize_v2i64_zero_mask(i64* %p, <2 x i64> %passthru) { ; CHECK-LABEL: @scalarize_v2i64_zero_mask( -; CHECK-NEXT: br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]] -; CHECK: cond.load: -; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ] -; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ] -; CHECK-NEXT: br i1 false, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]] -; CHECK: cond.load1: -; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1 -; CHECK-NEXT: br label [[ELSE2]] -; CHECK: else2: -; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ] -; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]] +; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]] ; %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> , <2 x i64> %passthru) ret <2 x i64> %ret @@ -77,23 +51,10 @@ define <2 x i64> @scalarize_v2i64_const_mask(i64* %p, <2 x i64> %passthru) { ; CHECK-LABEL: @scalarize_v2i64_const_mask( -; CHECK-NEXT: br i1 false, label [[COND_LOAD:%.*]], label [[ELSE:%.*]] -; CHECK: cond.load: -; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[P:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP1]], i64 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i32 1 -; CHECK-NEXT: br label [[ELSE]] -; CHECK: else: -; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP2]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ] -; CHECK-NEXT: [[PTR_PHI_ELSE:%.*]] = phi i64* [ [[TMP3]], [[COND_LOAD]] ], [ [[P]], [[TMP0]] ] -; CHECK-NEXT: br i1 true, label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]] -; CHECK: cond.load1: -; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[PTR_PHI_ELSE]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP4]], i64 1 -; CHECK-NEXT: br label [[ELSE2]] -; CHECK: else2: -; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ] -; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i32 0 +; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[TMP1]], align 1 +; CHECK-NEXT: [[RES1:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[LOAD1]], i64 1 +; CHECK-NEXT: ret <2 x i64> [[RES1]] ; %ret = call <2 x i64> @llvm.masked.expandload.v2i64.p0v2i64(i64* %p, <2 x i1> , <2 x i64> %passthru) ret <2 x i64> %ret