Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -3046,6 +3046,42 @@ return true; } + void fixLoadStoreAlign(Instruction &Root) { + // This algorithm implements the same visitor loop as + // hasUnsafePHIOrSelectUse, and fixes the alignment of each load + // or store found. + SmallPtrSet Visited; + SmallVector Uses; + Visited.insert(&Root); + Uses.push_back(&Root); + do { + Instruction *I = Uses.pop_back_val(); + + if (LoadInst *LI = dyn_cast(I)) { + unsigned LoadAlign = LI->getAlignment(); + if (!LoadAlign) + LoadAlign = DL.getABITypeAlignment(LI->getType()); + LI->setAlignment(std::min(LoadAlign, getSliceAlign())); + continue; + } + if (StoreInst *SI = dyn_cast(I)) { + unsigned StoreAlign = SI->getAlignment(); + if (!StoreAlign) { + Value *Op = SI->getOperand(0); + StoreAlign = DL.getABITypeAlignment(Op->getType()); + } + SI->setAlignment(std::min(StoreAlign, getSliceAlign())); + continue; + } + + assert(isa(I) || isa(I) || + isa(I) || isa(I)); + for (User *U : I->users()) + if (Visited.insert(cast(U)).second) + Uses.push_back(cast(U)); + } while (!Uses.empty()); + } + bool visitPHINode(PHINode &PN) { LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); @@ -3069,6 +3105,9 @@ LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this PHI node. + fixLoadStoreAlign(PN); + // PHIs can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. @@ -3093,6 +3132,9 @@ LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this select. + fixLoadStoreAlign(SI); + // Selects can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. Index: test/Transforms/SROA/phi-and-select.ll =================================================================== --- test/Transforms/SROA/phi-and-select.ll +++ test/Transforms/SROA/phi-and-select.ll @@ -600,3 +600,35 @@ store %struct.S undef, %struct.S* %f1, align 4 ret void } + +define i32 @phi_align(i32* %z) { +; CHECK-LABEL: @phi_align( +entry: + %a = alloca [8 x i8], align 8 +; CHECK: alloca [7 x i8] + + %a0x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 1 + %a0 = bitcast i8* %a0x to i32* + %a1x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 4 + %a1 = bitcast i8* %a1x to i32* +; CHECK: store i32 0, {{.*}}, align 1 + store i32 0, i32* %a0, align 1 +; CHECK: store i32 1, {{.*}}, align 1 + store i32 1, i32* %a1, align 4 +; CHECK: load {{.*}}, align 1 + %v0 = load i32, i32* %a0, align 1 +; CHECK: load {{.*}}, align 1 + %v1 = load i32, i32* %a1, align 4 + %cond = icmp sle i32 %v0, %v1 + br i1 %cond, label %then, label %exit + +then: + br label %exit + +exit: +; CHECK: %phi = phi i32* [ {{.*}}, %then ], [ %z, %entry ] +; CHECK-NEXT: %result = load i32, i32* %phi, align 1 + %phi = phi i32* [ %a1, %then ], [ %z, %entry ] + %result = load i32, i32* %phi, align 4 + ret i32 %result +}