diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1074,20 +1074,26 @@ Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + AAMDNodes AATags; + + if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) + return Changed; + + AATags = TheStore->getAAMetadata(); + for (Instruction *Store : Stores) + AATags = AATags.merge(Store->getAAMetadata()); + if (auto CI = dyn_cast(NumBytes)) + AATags = AATags.extendTo(CI->getZExtValue()); + else + AATags = AATags.extendTo(-1); + CallInst *NewCall; if (SplatValue) { - AAMDNodes AATags = TheStore->getAAMetadata(); - for (Instruction *Store : Stores) - AATags = AATags.merge(Store->getAAMetadata()); - if (auto CI = dyn_cast(NumBytes)) - AATags = AATags.extendTo(CI->getZExtValue()); - else - AATags = AATags.extendTo(-1); - NewCall = Builder.CreateMemSet( BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); - } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) { + } else { + assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; @@ -1105,8 +1111,17 @@ GV->setAlignment(Align(16)); Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); - } else - return Changed; + + // Set the TBAA info if present. + if (AATags.TBAA) + NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA); + + if (AATags.Scope) + NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope); + + if (AATags.NoAlias) + NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias); + } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="loop-idiom" < %s -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +target triple = "x86_64-apple-darwin10.0.0" + + +define dso_local void @double_memset(ptr nocapture %p) { +; CHECK-LABEL: @double_memset( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 128), !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07 + store double 3.14159e+00, ptr %ptr1, align 1, !tbaa !5 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, 16 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + + +define dso_local void @struct_memset(ptr nocapture %p) { +; CHECK-LABEL: @struct_memset( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 128), !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07 + store double 3.14159e+00, ptr %ptr1, align 1, !tbaa !10 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, 16 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define dso_local void @var_memset(ptr nocapture %p, i64 %len) { +; CHECK-LABEL: @var_memset( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i64 [[LEN:%.*]], 3 +; CHECK-NEXT: call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.2, i64 [[TMP0]]) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[LEN]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07 + store double 3.14159e+00, ptr %ptr1, align 1, !tbaa !10 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, %len + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +!5 = !{!6, !6, i64 0} +!6 = !{!"double", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} + +!15 = !{!8, i64 0, !"omnipotent char"} +!17 = !{!15, i64 8, !"double"} +!9 = !{!15, i64 32, !"_ZTS1A", !17, i64 0, i64 8, !17, i64 8, i64 8, !17, i64 16, i64 8, !17, i64 24, i64 8} +!10 = !{!9, !17, i64 0, i64 1} + +!18 = !{!19, !20, i64 0} +!19 = !{!"A", !20, i64 0, !22, i64 8} +!20 = !{!"any pointer", !7, i64 0} +!21 = !{!22, !20, i64 0} +!22 = !{!"B", !20, i64 0}