Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -75,7 +75,16 @@ /// originally inserted callsites were DCE'ed after they were cloned. std::vector OperandBundleCallSites; + /// Like VMap, but maps only unsimplified instructions. Values in the map + /// may be dangling, it is only intended to be used via isSimplified(), to + /// check whether the main VMap mapping involves simplification or not. + DenseMap OrigVMap; + ClonedCodeInfo() = default; + + bool isSimplified(const Value *From, const Value *To) const { + return OrigVMap.lookup(From) != To; + } }; /// Return a copy of the specified basic block, but without Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -412,10 +412,12 @@ NewBB->getInstList().push_back(NewInst); hasCalls |= (isa(II) && !isa(II)); - if (CodeInfo) + if (CodeInfo) { + CodeInfo->OrigVMap[&*II] = NewInst; if (auto *CB = dyn_cast(&*II)) if (CB->hasOperandBundles()) CodeInfo->OperandBundleCallSites.push_back(NewInst); + } if (const AllocaInst *AI = dyn_cast(II)) { if (isa(AI->getArraySize())) @@ -469,10 +471,12 @@ NewBB->getInstList().push_back(NewInst); VMap[OldTI] = NewInst; // Add instruction map to value. - if (CodeInfo) + if (CodeInfo) { + CodeInfo->OrigVMap[OldTI] = NewInst; if (auto *CB = dyn_cast(OldTI)) if (CB->hasOperandBundles()) CodeInfo->OperandBundleCallSites.push_back(NewInst); + } // Recursively clone any reachable successor blocks. append_range(ToClone, successors(BB->getTerminator())); Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -939,7 +939,8 @@ /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, - const DataLayout &DL, AAResults *CalleeAAR) { + const DataLayout &DL, AAResults *CalleeAAR, + ClonedCodeInfo &InlinedFunctionInfo) { if (!EnableNoAliasConversion) return; @@ -1009,7 +1010,7 @@ continue; Instruction *NI = dyn_cast(VMI->second); - if (!NI) + if (!NI || InlinedFunctionInfo.isSimplified(I, NI)) continue; bool IsArgMemOnlyCall = false, IsFuncCall = false; @@ -2037,7 +2038,7 @@ SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. - AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); + AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); // Clone return attributes on the callsite into the calls within the inlined // function which feed into its return value. Index: llvm/test/Transforms/Inline/pr50589.ll =================================================================== --- llvm/test/Transforms/Inline/pr50589.ll +++ llvm/test/Transforms/Inline/pr50589.ll @@ -15,10 +15,10 @@ ret <2 x i8> %ret } -; TODO: The load should not have !noalias. +; The load should not have !noalias. define void @caller1(<2 x i8>* %ptr1, <2 x i8>* %ptr2) { ; CHECK-LABEL: @caller1( -; CHECK-NEXT: [[PASSTHRU:%.*]] = load <2 x i8>, <2 x i8>* [[PTR2:%.*]], align 2, !noalias !0 +; CHECK-NEXT: [[PASSTHRU:%.*]] = load <2 x i8>, <2 x i8>* [[PTR2:%.*]], align 2{{$}} ; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) ; CHECK-NEXT: store <2 x i8> zeroinitializer, <2 x i8>* [[PTR2]], align 2, !alias.scope !0 ; CHECK-NEXT: ret void @@ -41,11 +41,11 @@ ret <2 x i8> %ret } -; TODO: The load should not have !noalias. +; The load should not have !noalias. define void @caller2(<2 x i8>* %ptr1, <2 x i8>* %ptr2) { ; CHECK-LABEL: @caller2( ; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) -; CHECK-NEXT: [[PASSTHRU_I:%.*]] = load <2 x i8>, <2 x i8>* [[PTR2:%.*]], align 2, !alias.scope !3, !noalias !3 +; CHECK-NEXT: [[PASSTHRU_I:%.*]] = load <2 x i8>, <2 x i8>* [[PTR2:%.*]], align 2, !alias.scope !3{{$}} ; CHECK-NEXT: store <2 x i8> zeroinitializer, <2 x i8>* [[PTR2]], align 2, !alias.scope !3 ; CHECK-NEXT: ret void ;