diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -495,12 +495,15 @@ if (!AreEquivalentAddressValues(StorePtr, Ptr)) return nullptr; + if (IsLoadCSE) + *IsLoadCSE = false; + Value *Val = SI->getValueOperand(); - if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL)) { - if (IsLoadCSE) - *IsLoadCSE = false; + if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL)) return Val; - } + + if (auto *C = dyn_cast(Val)) + return ConstantFoldLoadThroughBitcast(C, AccessTy, DL); } return nullptr; diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -22,8 +22,7 @@ ; CHECK-LABEL: @vec_store_load_first( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* ; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[P2]], align 8 -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[P]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] +; CHECK-NEXT: ret i32 1 ; %p2 = bitcast i32* %p to <2 x i32>* store <2 x i32> , <2 x i32>* %p2 @@ -31,6 +30,19 @@ ret i32 %load } +define i32 @vec_store_load_first_constexpr(i32* %p) { +; CHECK-LABEL: @vec_store_load_first_constexpr( +; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> bitcast (i64 ptrtoint (i32 (i32*)* @vec_store_load_first to i64) to <2 x i32>), <2 x i32>* [[P2]], align 8 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %p2 = bitcast i32* %p to <2 x i32>* + store <2 x i32> bitcast (i64 ptrtoint (i32 (i32*)* @vec_store_load_first to i64) to <2 x i32>), <2 x i32>* %p2, align 8 + %load = load i32, i32* %p, align 4 + ret i32 %load +} + define i32 @vec_store_load_second(i32* %p) { ; CHECK-LABEL: @vec_store_load_second( ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to <2 x i32>*