Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1344,9 +1344,24 @@ if (!isMinMaxWithLoads(LoadAddr)) return false; + if (!all_of(LI->users(), [LI](User *U) { + auto *SI = dyn_cast(U); + return SI && SI->getPointerOperand() != LI && + !SI->getPointerOperand()->isSwiftError(); + })) + return false; + + IC.Builder.SetInsertPoint(LI); LoadInst *NewLI = combineLoadToNewType( IC, *LI, LoadAddr->getType()->getPointerElementType()); - combineStoreToNewValue(IC, SI, NewLI); + // Replace all the stores with stores of the newly loaded value. + for (auto *UI : LI->users()) { + auto *SI = cast(UI); + IC.Builder.SetInsertPoint(SI); + combineStoreToNewValue(IC, *SI, NewLI); + IC.eraseInstFromFunction(*SI); + } + IC.eraseInstFromFunction(*LI); return true; } @@ -1375,7 +1390,7 @@ return eraseInstFromFunction(SI); if (removeBitcastsFromLoadStoreOnMinMax(*this, SI)) - return eraseInstFromFunction(SI); + return nullptr; // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { Index: test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/multiple-uses-load-bitcast-select.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S -data-layout="E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" | FileCheck %s + +%0 = type { double } + +define void @_ZN3pov13Compute_PrismEPNS_12Prism_StructEPA2_d(%0* nocapture) local_unnamed_addr { +; CHECK-LABEL: @_ZN3pov13Compute_PrismEPNS_12Prism_StructEPA2_d( +; CHECK-NEXT: [[DOTSROA_03:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[DOTSROA_3:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[DOTSROA_0:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* inttoptr (i64 120 to i32*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 1 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP10:%.*]] +; CHECK: [[TMP5:%.*]] = bitcast i64* [[DOTSROA_0]] to double* +; CHECK-NEXT: [[DOTSROA_0_0__SROA_0_0_1:%.*]] = load double, double* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load double, double* inttoptr (i64 16 to double*), align 16 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt double [[DOTSROA_0_0__SROA_0_0_1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64* inttoptr (i64 16 to i64*), i64* [[DOTSROA_0]] +; CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 +; CHECK-NEXT: store i64 [[TMP9]], i64* [[DOTSROA_0]], align 8 +; CHECK-NEXT: unreachable +; CHECK: [[DOTSROA_03_0__SROA_03_8_:%.*]] = load double, double* [[DOTSROA_03]], align 8 +; CHECK-NEXT: [[DOTSROA_3_0__SROA_3_16_:%.*]] = load double, double* [[DOTSROA_3]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = fcmp olt double [[DOTSROA_03_0__SROA_03_8_]], [[DOTSROA_3_0__SROA_3_16_]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], double [[DOTSROA_3_0__SROA_3_16_]], double [[DOTSROA_03_0__SROA_03_8_]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[DOTSROA_0]] to double* +; CHECK-NEXT: store double [[TMP12]], double* [[TMP13]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[TMP0]], i64 0, i32 0 +; CHECK-NEXT: store double [[TMP12]], double* [[TMP14]], align 8 +; CHECK-NEXT: ret void +; + %.sroa.03 = alloca double + %.sroa.3 = alloca double + %.sroa.0 = alloca i64 + %2 = load i32, i32* inttoptr (i64 120 to i32*), align 8 + %3 = icmp sgt i32 %2, 1 + br i1 %3, label %4, label %10 + +;