diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -101,7 +101,8 @@ assert(LoadPtr->getType()->getPointerAddressSpace() == StorePtr->getType()->getPointerAddressSpace() && - LoadType == getLoadStoreType(Store) && + LoadType->getScalarSizeInBits() == + getLoadStoreType(Store)->getScalarSizeInBits() && "Should be a known dependence"); // Currently we only support accesses with unit stride. FIXME: we should be @@ -211,9 +212,9 @@ if (!Load) continue; - // Only progagate the value if they are of the same type. - if (Store->getPointerOperandType() != Load->getPointerOperandType() || - getLoadStoreType(Store) != getLoadStoreType(Load)) + // Only progagate the value if the pointees are the same size. + if (getLoadStoreType(Store)->getScalarSizeInBits() != + getLoadStoreType(Load)->getScalarSizeInBits()) continue; Candidates.emplace_front(Load, Store); @@ -438,7 +439,25 @@ PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded", &L->getHeader()->front()); PHI->addIncoming(Initial, PH); - PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch()); + + Value *StoreValue; + + Type *LoadType = Initial->getType(); + Type *StoreType = Cand.Store->getOperand(0)->getType(); + + assert(LoadType->getScalarSizeInBits() == + StoreType->getScalarSizeInBits() && + "The type sizes should match!"); + + if (LoadType != StoreType) { + // Need a bitcast to convert to the loaded type + StoreValue = + CastInst::Create(Instruction::BitCast, Cand.Store->getOperand(0), + LoadType, "store_forward_cast", Cand.Store); + } else + StoreValue = Cand.Store->getOperand(0); + + PHI->addIncoming(StoreValue, L->getLoopLatch()); Cand.Load->replaceAllUsesWith(PHI); } diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll --- a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll +++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll @@ -1,6 +1,7 @@ ; RUN: opt --opaque-pointers -loop-load-elim -S < %s | FileCheck %s -; Don't crash if the store and the load use different types. +; If the store and the load use different types, but have the same +; size then we should still be able to forward the value. ; ; for (unsigned i = 0; i < 100; i++) { ; A[i+1] = B[i] + 2; @@ -30,7 +31,7 @@ ; CHECK: %a = load float, ptr %Aidx, align 4 %a = load float, ptr %Aidx, align 4 -; CHECK-NEXT: %c = fmul float %a, 2.0 +; CHECK-NEXT: %c = fmul float %store_forwarded, 2.0 %c = fmul float %a, 2.0 %c.int = fptosi float %c to i32 store i32 %c.int, ptr %Cidx, align 4 @@ -42,7 +43,8 @@ ret void } -; Don't crash if the store and the load use different types. +; If the store and the load use different types, but have the same +; size then we should still be able to forward the value. ; ; for (unsigned i = 0; i < 100; i++) { ; A[i+1] = B[i] + 2; @@ -74,7 +76,7 @@ ; CHECK: %a = load float, ptr %Aidx, align 4 %a = load float, ptr %Aidx, align 4 -; CHECK-NEXT: %c = fmul float %a, 2.0 +; CHECK-NEXT: %c = fmul float %store_forwarded, 2.0 %c = fmul float %a, 2.0 %c.int = fptosi float %c to i32 store i32 %c.int, ptr %Cidx, align 4 diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll --- a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll +++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll @@ -1,6 +1,7 @@ ; RUN: opt -loop-load-elim -S < %s | FileCheck %s -; Don't crash if the store and the load use different types. +; If the store and the load use different types, but have the same +; size then we should still be able to forward the value. ; ; for (unsigned i = 0; i < 100; i++) { ; A[i+1] = B[i] + 2; @@ -27,11 +28,12 @@ %b = load i32, i32* %Bidx, align 4 %a_p1 = add i32 %b, 2 +; CHECK: %store_forward_cast = bitcast i32 %a_p1 to float store i32 %a_p1, i32* %Aidx_next, align 4 ; CHECK: %a = load float, float* %Aidx.float, align 4 %a = load float, float* %Aidx.float, align 4 -; CHECK-NEXT: %c = fmul float %a, 2.0 +; CHECK-NEXT: %c = fmul float %store_forwarded, 2.0 %c = fmul float %a, 2.0 %c.int = fptosi float %c to i32 store i32 %c.int, i32* %Cidx, align 4 @@ -43,7 +45,8 @@ ret void } -; Don't crash if the store and the load use different types. +; If the store and the load use different types, but have the same +; size then we should still be able to forward the value. ; ; for (unsigned i = 0; i < 100; i++) { ; A[i+1] = B[i] + 2; @@ -69,6 +72,7 @@ %b = load i32, i32* %Bidx, align 4 %a_p2 = add i32 %b, 2 +; CHECK: %store_forward_cast = bitcast i32 %a_p3 to float store i32 %a_p2, i32* %Aidx_next, align 4 %a_p3 = add i32 %b, 3 @@ -76,7 +80,7 @@ ; CHECK: %a = load float, float* %Aidx.float, align 4 %a = load float, float* %Aidx.float, align 4 -; CHECK-NEXT: %c = fmul float %a, 2.0 +; CHECK-NEXT: %c = fmul float %store_forwarded, 2.0 %c = fmul float %a, 2.0 %c.int = fptosi float %c to i32 store i32 %c.int, i32* %Cidx, align 4 @@ -87,3 +91,40 @@ for.end: ; preds = %for.body ret void } + +; Check that we don't forward between pointer-sized integers and actual +; pointers; We could potentially do this in future but in IR pointers are +; considered to have a size of 0. +; CHECK-LABEL: @f3( +; CHECK-NOT: store_forwarded +define void @f3(i64* noalias %A, i64* noalias %B, i64* noalias %C, i64 %N) { + +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + + %Aidx_next = getelementptr inbounds i64, i64* %A, i64 %indvars.iv.next + %Bidx = getelementptr inbounds i64, i64* %B, i64 %indvars.iv + %Cidx = getelementptr inbounds i64, i64* %C, i64 %indvars.iv + %Aidx = getelementptr inbounds i64, i64* %A, i64 %indvars.iv + %Aidx.i8p = bitcast i64* %Aidx to i8** + + %b = load i64, i64* %Bidx, align 8 + %a_p1 = add i64 %b, 2 + store i64 %a_p1, i64* %Aidx_next, align 8 + + %a = load i8*, i8** %Aidx.i8p, align 8 + %c = getelementptr i8, i8* %a, i64 57 + %c.i64p = ptrtoint i8* %c to i64 + store i64 %c.i64p, i64* %Cidx, align 8 + + %exitcond = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} +