Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -2860,7 +2860,8 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, - bool InvertPCond, bool InvertQCond) { + bool InvertPCond, bool InvertQCond, + const DataLayout &DL) { auto IsaBitcastOfPointerType = [](const Instruction &I) { return Operator::getOpcode(&I) == Instruction::BitCast && I.getType()->isPointerTy(); @@ -2966,6 +2967,29 @@ PStore->getAAMetadata(AAMD, /*Merge=*/false); PStore->getAAMetadata(AAMD, /*Merge=*/true); SI->setAAMetadata(AAMD); + unsigned PAlignment = PStore->getAlignment(); + unsigned QAlignment = QStore->getAlignment(); + unsigned TypeAlignment = + DL.getABITypeAlignment(SI->getValueOperand()->getType()); + unsigned MinAlignment; + unsigned MaxAlignment; + std::tie(MinAlignment, MaxAlignment) = std::minmax(PAlignment, QAlignment); + // Choose the minimum alignment. If we could prove both stores execute, we + // could use biggest one. In this case, though, we only know that one of the + // stores executes. And we don't know it's safe to take the alignment from a + // store that doesn't execute. + if (MinAlignment != 0) { + // Choose the minimum of all non-zero alignments. + SI->setAlignment(MinAlignment); + } else if (MaxAlignment != 0) { + // Choose the minimal alignment between the non-zero alignment and the ABI + // default alignment for the type of the stored value. + SI->setAlignment(std::min(MaxAlignment, TypeAlignment)); + } else { + // If both alignments are zero, use ABI default alignment for the type of + // the stored value. + SI->setAlignment(TypeAlignment); + } QStore->eraseFromParent(); PStore->eraseFromParent(); @@ -2973,7 +2997,8 @@ return true; } -static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { +static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, + const DataLayout &DL) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these // stores are conditional, so they can't be unconditionally sunk. But it may @@ -3076,7 +3101,7 @@ bool Changed = false; for (auto *Address : CommonAddresses) Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond); + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL); return Changed; } @@ -3141,7 +3166,7 @@ // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DL)) return true; // If this is a conditional branch in an empty block, and if any @@ -5829,7 +5854,7 @@ if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI)) + if (mergeConditionalStores(PBI, BI, DL)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; return false; Index: test/Transforms/SimplifyCFG/preserve-store-alignment.ll =================================================================== --- test/Transforms/SimplifyCFG/preserve-store-alignment.ll +++ test/Transforms/SimplifyCFG/preserve-store-alignment.ll @@ -25,7 +25,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP10:%.*]] -; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*) +; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: ret i32 0 ; @@ -77,7 +77,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP10:%.*]] -; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*) +; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: ret i32 0 ; @@ -129,7 +129,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP10:%.*]] -; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*) +; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*), align 8 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: ret i32 0 ; @@ -181,7 +181,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP10:%.*]] -; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*) +; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*), align 16 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: ret i32 0 ; @@ -233,7 +233,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP10:%.*]] -; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*) +; CHECK: store <2 x i64> [[DOT]], <2 x i64>* bitcast (i64* getelementptr inbounds (%struct.Counters, %struct.Counters* @counters, i64 0, i32 1) to <2 x i64>*), align 16 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: ret i32 0 ;