diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -747,10 +747,13 @@ /// sometimes useful to ignore certain attributes. enum OperationEquivalenceFlags { /// Check for equivalence ignoring load/store alignment. - CompareIgnoringAlignment = 1<<0, + CompareIgnoringAlignment = 1 << 0, /// Check for equivalence treating a type and a vector of that type /// as equivalent. - CompareUsingScalarTypes = 1<<1 + CompareUsingScalarTypes = 1 << 1, + /// Check for equivalence ignoring operand types, particularly useful + /// for loads and stores (see commit 816d26fe5eeae4ad) + CompareIgnoringType = 1 << 2 }; /// This function determines if the specified instruction executes the same @@ -763,6 +766,27 @@ /// Determine if one instruction is the same operation as another. bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const LLVM_READONLY; + /// This function determines if the specified instruction executes the same + /// load / store operation as the current one. This means that the opcodes, + /// and any other factors affecting the operation must be the same. This is + /// similar to isSameOperationAs except it ignores types since memory does not + /// have an intrinsic type + /// @returns true if the specified instruction is the same operation as + /// the current one. + /// Determine if one instruction is the same operation as another. + bool isSameLoadStoreOperationAs(const Instruction *I) const LLVM_READONLY; + + /// This function determines if the specified instruction has conflicting + /// value types with the current one. This means that the memory opcodes are + /// the same, but the value type is different. It is meant to be used as a + /// complement to isSameLoadStoreOperationAs + /// @returns true if the specified instruction is the same memopry operation + /// as the current one, but with a different value tpye Determine if one + /// instruction is the same memory operation as another, but with a different + /// value type. + bool isLoadStoreConflictingType(const Instruction *I, + bool UseScalarTy = false) const LLVM_READONLY; + /// Return true if there are any uses of this instruction in blocks other than /// the specified block. Note that PHI nodes are considered to evaluate their /// operands in the corresponding predecessor block. diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -387,6 +387,11 @@ !isVolatile(); } + /// Whether bitcasting the ValueOperand has no semantic effect. For example, + /// bitcasting an atomic store may cause complications as backends may rely + /// on the exact type stored to select appropriate atomic ops + bool isTriviallyBitcasted() const { return isUnordered() && !isAtomic(); } + Value *getValueOperand() { return getOperand(0); } const Value *getValueOperand() const { return getOperand(0); } diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -586,26 +586,52 @@ unsigned flags) const { bool IgnoreAlignment = flags & CompareIgnoringAlignment; bool UseScalarTypes = flags & CompareUsingScalarTypes; + bool IgnoreType = flags & CompareIgnoringType; - if (getOpcode() != I->getOpcode() || - getNumOperands() != I->getNumOperands() || - (UseScalarTypes ? - getType()->getScalarType() != I->getType()->getScalarType() : - getType() != I->getType())) + if (getOpcode() != I->getOpcode() || getNumOperands() != I->getNumOperands()) return false; // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same type - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (UseScalarTypes ? - getOperand(i)->getType()->getScalarType() != - I->getOperand(i)->getType()->getScalarType() : - getOperand(i)->getType() != I->getOperand(i)->getType()) + if (!IgnoreType) { + if ((UseScalarTypes + ? getType()->getScalarType() != I->getType()->getScalarType() + : getType() != I->getType())) return false; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (UseScalarTypes + ? getOperand(i)->getType()->getScalarType() != + I->getOperand(i)->getType()->getScalarType() + : getOperand(i)->getType() != I->getOperand(i)->getType()) + return false; + } return haveSameSpecialState(this, I, IgnoreAlignment); } +bool Instruction::isSameLoadStoreOperationAs(const Instruction *I) const { + if (!isa(this) && !isa(this)) + return false; + + return isSameOperationAs(I, CompareIgnoringType); +} + +bool Instruction::isLoadStoreConflictingType(const Instruction *I, + bool UseScalarTy) const { + + if (getOpcode() != I->getOpcode()) + return false; + + if (!isa(this) && !isa(this)) + return false; + + auto ThisType = getLoadStoreType(const_cast(this)); + auto OtherType = getLoadStoreType(const_cast(I)); + + return UseScalarTy ? ThisType->getScalarType() != OtherType->getScalarType() + : ThisType != OtherType; +} + bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const { for (const Use &U : uses()) { // PHI nodes uses values in the corresponding predecessor block. For other diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp --- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -78,6 +78,7 @@ #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -191,10 +192,11 @@ MemoryLocation Loc0 = MemoryLocation::get(Store0); MemoryLocation Loc1 = MemoryLocation::get(Store1); - if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) && + if (AA->isMustAlias(Loc0, Loc1) && !isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) && !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) { - return Store1; + if (Store0->isSameLoadStoreOperationAs(Store1)) + return Store1; } } return nullptr; @@ -254,6 +256,14 @@ S0->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc()); S0->mergeDIAssignID(S1); + // Insert bitcast for conflicting typed stores. + if (S0->isLoadStoreConflictingType(S1) && S0->isTriviallyBitcasted()) { + IRBuilder<> Builder(S0); + auto Cast = Builder.CreateBitCast(S0->getValueOperand(), + S1->getValueOperand()->getType()); + S0->setOperand(0, Cast); + } + // Create the new store to be inserted at the join point. StoreInst *SNew = cast(S0->clone()); SNew->insertBefore(&*InsertPt); diff --git a/llvm/test/Transforms/MergedLoadStoreMotion/st_sink_conflict_type.ll b/llvm/test/Transforms/MergedLoadStoreMotion/st_sink_conflict_type.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergedLoadStoreMotion/st_sink_conflict_type.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +define internal fastcc void @sink_conflict(ptr %this.64.val, half %val1, i16 %val2, i32 %val3) unnamed_addr align 2 { +; CHECK-LABEL: define internal fastcc void @sink_conflict +; CHECK-SAME: (ptr [[THIS_64_VAL:%.*]], half [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) unnamed_addr align 2 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT_NOT:%.*]] = icmp eq i32 [[VAL3]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT_NOT]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast half [[VAL1]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[VAL2]] to half +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[VAL2_SINK:%.*]] = phi i16 [ [[TMP0]], [[IF_THEN]] ], [ [[VAL2]], [[IF_ELSE]] ] +; CHECK-NEXT: [[VAL1_SINK:%.*]] = phi half [ [[TMP1]], [[IF_THEN]] ], [ [[VAL1]], [[IF_ELSE]] ] +; CHECK-NEXT: store i16 [[VAL2_SINK]], ptr [[THIS_64_VAL]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS_64_VAL]], i64 16 +; CHECK-NEXT: store half [[VAL1_SINK]], ptr [[TMP2]], align 2 +; CHECK-NEXT: ret void +; +entry: + %cmp.not.not = icmp eq i32 %val3, 0 + br i1 %cmp.not.not, label %if.then, label %if.else + +if.then: ; preds = %entry + store half %val1, ptr %this.64.val, align 2 + %add.ptr.i.i.i.i = getelementptr inbounds i8, ptr %this.64.val, i64 16 + store i16 %val2, ptr %add.ptr.i.i.i.i, align 2 + br label %if.end + +if.else: ; preds = %entry + %add.ptr.i.i.i.i7 = getelementptr inbounds i8, ptr %this.64.val, i64 16 + store half %val1, ptr %add.ptr.i.i.i.i7, align 2 + store i16 %val2, ptr %this.64.val, align 2 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/bitcast-store-branch.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -O3 -S < %s | FileCheck %s + +%struct.ss = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr } + +define internal void @phantomLoad(ptr %p, ptr %y, ptr %x) { +entry: + %0 = load i32, ptr %x + store i32 %0, ptr %y + ret void +} + +define ptr @parent(ptr align 8 dereferenceable(72) %f, half %val1, i16 %val2, i32 %val3) align 2 { +; CHECK-LABEL: define nonnull ptr @parent +; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], half [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[F]], i64 64 +; CHECK-NEXT: [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half [[VAL1]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[VAL2]] to half +; CHECK-NEXT: [[VAL2_SINK_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[TMP1]], i16 [[VAL2]] +; CHECK-NEXT: [[VAL1_SINK_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], half [[TMP2]], half [[VAL1]] +; CHECK-NEXT: store i16 [[VAL2_SINK_I]], ptr [[F_VAL]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16 +; CHECK-NEXT: store half [[VAL1_SINK_I]], ptr [[TMP3]], align 2 +; CHECK-NEXT: ret ptr [[F]] +; +entry: + call void @badChild(ptr align 8 dereferenceable(72) %f, half %val1, i16 %val2, i32 %val3) #4 + ret ptr %f +} + + +define internal void @badChild(ptr align 8 dereferenceable(72) %this, half %val1, i16 %val2, i32 %val3) align 2 { +entry: + %othergep = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 2 + %load0 = load ptr, ptr %othergep, align 8 + %x = alloca i32 + %y = alloca i32 + call void @phantomLoad(ptr %load0, ptr %x, ptr %y) + %val1.cast = bitcast half %val1 to i16 + %cmp.not.not = icmp eq i32 %val3, 0 + br i1 %cmp.not.not, label %if.then, label %if.else +if.then: ; preds = %entry + %0 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %1 = load ptr, ptr %0, align 8 + store i16 %val1.cast, ptr %1, align 2 + %add.ptr.i.i.i.i = getelementptr inbounds i8, ptr %1, i64 16 + store i16 %val2, ptr %add.ptr.i.i.i.i, align 2 + br label %if.end +if.else: ; preds = %entry + %2 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %3 = load ptr, ptr %2, align 8 + %add.ptr.i.i.i.i7 = getelementptr inbounds i8, ptr %3, i64 16 + store i16 %val1.cast, ptr %add.ptr.i.i.i.i7, align 2 + store i16 %val2, ptr %3, align 2 + br label %if.end +if.end: ; preds = %if.else, %if.then + ret void +}