Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -1924,9 +1924,21 @@ SawUnorderedAtomic |= Load->isAtomic(); SawNotAtomic |= !Load->isAtomic(); - if (!DereferenceableInPH) - DereferenceableInPH = isSafeToExecuteUnconditionally( - *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); + unsigned InstAlignment = Load->getAlignment(); + if (!InstAlignment) + InstAlignment = + MDL.getABITypeAlignment(Load->getType()); + + // Note that proving a load safe to speculate requires proving + // sufficient alignment at the target location. Proving it guaranteed + // to execute does as well. Thus we can thus increase our guaranteed + // alignment as well. + if (!DereferenceableInPH || (InstAlignment > Alignment)) + if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo, + ORE, Preheader->getTerminator())) { + DereferenceableInPH = true; + Alignment = std::max(Alignment, InstAlignment); + } } else if (const StoreInst *Store = dyn_cast(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. @@ -1997,6 +2009,14 @@ if (SawUnorderedAtomic && SawNotAtomic) return false; + // If we're inserting an atomic load in the preheader, we must be able to + // lower it. We're only guaranteed to be able to lower naturally aligned + // atomics. + auto *SomePtrElemType = SomePtr->getType()->getPointerElementType(); + if (SawUnorderedAtomic && + Alignment < MDL.getTypeStoreSize(SomePtrElemType)) + return false; + // If we couldn't prove we can hoist the load, bail. if (!DereferenceableInPH) return false; Index: test/Transforms/LICM/promote-tls.ll =================================================================== --- test/Transforms/LICM/promote-tls.ll +++ test/Transforms/LICM/promote-tls.ll @@ -22,7 +22,7 @@ for.body.lr.ph: ; preds = %entry ; CHECK-LABEL: for.body.lr.ph: -; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1 +; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4 br label %for.header for.header: @@ -35,7 +35,7 @@ early-exit: ; CHECK-LABEL: early-exit: -; CHECK: store i32 %new1.lcssa, i32* %addr, align 1 +; CHECK: store i32 %new1.lcssa, i32* %addr, align 4 ret i32* null for.body: @@ -47,7 +47,7 @@ for.cond.for.end_crit_edge: ; preds = %for.body ; CHECK-LABEL: for.cond.for.end_crit_edge: -; CHECK: store i32 %new.lcssa, i32* %addr, align 1 +; CHECK: store i32 %new.lcssa, i32* %addr, align 4 %split = phi i32* [ %addr, %for.body ] ret i32* null } @@ -62,7 +62,7 @@ for.body.lr.ph: ; preds = %entry ; CHECK-LABEL: for.body.lr.ph: -; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 1 +; CHECK-NEXT: %addr.promoted = load i32, i32* %addr, align 4 br label %for.header for.header: @@ -75,7 +75,7 @@ early-exit: ; CHECK-LABEL: early-exit: -; CHECK: store i32 %new1.lcssa, i32* %addr, align 1 +; CHECK: store i32 %new1.lcssa, i32* %addr, align 4 ret i32* null for.body: @@ -87,7 +87,7 @@ for.cond.for.end_crit_edge: ; preds = %for.body ; CHECK-LABEL: for.cond.for.end_crit_edge: -; CHECK: store i32 %new.lcssa, i32* %addr, align 1 +; CHECK: store i32 %new.lcssa, i32* %addr, align 4 %split = phi i32* [ %addr, %for.body ] ret i32* null } Index: test/Transforms/LICM/scalar-promote-unwind.ll =================================================================== --- test/Transforms/LICM/scalar-promote-unwind.ll +++ test/Transforms/LICM/scalar-promote-unwind.ll @@ -74,7 +74,7 @@ entry: ; CHECK-LABEL: entry: ; CHECK-NEXT: %a = alloca i32 -; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 1 +; CHECK-NEXT: %a.promoted = load i32, i32* %a, align 4 %a = alloca i32 br label %for.body @@ -90,20 +90,18 @@ for.cond.cleanup: ; CHECK-LABEL: for.cond.cleanup: -; CHECK: store i32 %add.lcssa, i32* %a, align 1 +; CHECK: store i32 %add.lcssa, i32* %a, align 4 ; CHECK-NEXT: ret void ret void } ;; Same as test3, but with unordered atomics -;; FIXME: doing the transform w/o alignment here is wrong since we're -;; creating an unaligned atomic which we may not be able to lower. define void @test3b(i1 zeroext %y) uwtable { ; CHECK-LABEL: @test3 entry: ; CHECK-LABEL: entry: ; CHECK-NEXT: %a = alloca i32 -; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 1 +; CHECK-NEXT: %a.promoted = load atomic i32, i32* %a unordered, align 4 %a = alloca i32 br label %for.body @@ -119,7 +117,7 @@ for.cond.cleanup: ; CHECK-LABEL: for.cond.cleanup: -; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 1 +; CHECK: store atomic i32 %add.lcssa, i32* %a unordered, align 4 ; CHECK-NEXT: ret void ret void }