Index: lib/Transforms/Scalar/LoopRerollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopRerollPass.cpp +++ lib/Transforms/Scalar/LoopRerollPass.cpp @@ -739,11 +739,11 @@ collectInLoopUserSet(Root, Exclude, Final, Users); } -static bool isSimpleLoadStore(Instruction *I) { +static bool isUnorderedLoadStore(Instruction *I) { if (LoadInst *LI = dyn_cast(I)) - return LI->isSimple(); + return LI->isUnordered(); if (StoreInst *SI = dyn_cast(I)) - return SI->isSimple(); + return SI->isUnordered(); if (MemIntrinsic *MI = dyn_cast(I)) return !MI->isVolatile(); return false; @@ -1283,7 +1283,7 @@ // which while a valid (somewhat arbitrary) micro-optimization, is // needed because otherwise isSafeToSpeculativelyExecute returns // false on PHI nodes. - if (!isa(I) && !isSimpleLoadStore(I) && + if (!isa(I) && !isUnorderedLoadStore(I) && !isSafeToSpeculativelyExecute(I)) // Intervening instructions cause side effects. FutureSideEffects = true; @@ -1313,10 +1313,10 @@ // If we've past an instruction from a future iteration that may have // side effects, and this instruction might also, then we can't reorder // them, and this matching fails. As an exception, we allow the alias - // set tracker to handle regular (simple) load/store dependencies. - if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) && + // set tracker to handle regular (unordered) load/store dependencies. + if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) && !isSafeToSpeculativelyExecute(BaseInst)) || - (!isSimpleLoadStore(RootInst) && + (!isUnorderedLoadStore(RootInst) && !isSafeToSpeculativelyExecute(RootInst)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << " vs. " << *RootInst << Index: test/Transforms/LoopReroll/basic.ll =================================================================== --- test/Transforms/LoopReroll/basic.ll +++ test/Transforms/LoopReroll/basic.ll @@ -576,6 +576,105 @@ } +define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) { +; CHECK-LABEL: @unordered_atomic_ops( + +; CHECK: for.body: +; CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK-NEXT: %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvar +; CHECK-NEXT: %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvar +; CHECK-NEXT: %va = load atomic i32, i32* %buf0_a unordered, align 4 +; CHECK-NEXT: store atomic i32 %va, i32* %buf1_a unordered, align 4 +; CHECK-NEXT: %indvar.next = add i32 %indvar, 1 +; CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 3199 +; CHECK-NEXT: br i1 %exitcond, label %for.end, label %for.body + +entry: + br label %for.body + +for.body: + %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add i32 %indvars.iv, 2 + %indvars.mid = add i32 %indvars.iv, 1 + %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv + %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid + %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv + %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid + %va = load atomic i32, i32* %buf0_a unordered, align 4 + %vb = load atomic i32, i32* %buf0_b unordered, align 4 + store atomic i32 %va, i32* %buf1_a unordered, align 4 + store atomic i32 %vb, i32* %buf1_b unordered, align 4 + %cmp = icmp slt i32 %indvars.iv.next, 3200 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +define void @unordered_atomic_ops_nomatch(i32* noalias %buf_0, i32* noalias %buf_1) { +; Negative test + +; CHECK-LABEL: @unordered_atomic_ops_nomatch( +entry: + br label %for.body + +for.body: +; CHECK: for.body: +; CHECK: %indvars.iv.next = add i32 %indvars.iv, 2 +; CHECK: %indvars.mid = add i32 %indvars.iv, 1 +; CHECK: %cmp = icmp slt i32 %indvars.iv.next, 3200 +; CHECK: br i1 %cmp, label %for.body, label %for.end + + %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add i32 %indvars.iv, 2 + %indvars.mid = add i32 %indvars.iv, 1 + %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv + %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid + %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv + %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid + %va = load atomic i32, i32* %buf0_a unordered, align 4 + %vb = load atomic i32, i32* %buf0_b unordered, align 4 + store i32 %va, i32* %buf1_a, align 4 ;; Not atomic + store atomic i32 %vb, i32* %buf1_b unordered, align 4 + %cmp = icmp slt i32 %indvars.iv.next, 3200 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +define void @ordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) { +; Negative test + +; CHECK-LABEL: @ordered_atomic_ops( +entry: + br label %for.body + +for.body: +; CHECK: for.body: +; CHECK: %indvars.iv.next = add i32 %indvars.iv, 2 +; CHECK: %indvars.mid = add i32 %indvars.iv, 1 +; CHECK: %cmp = icmp slt i32 %indvars.iv.next, 3200 +; CHECK: br i1 %cmp, label %for.body, label %for.end + + %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add i32 %indvars.iv, 2 + %indvars.mid = add i32 %indvars.iv, 1 + %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv + %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid + %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv + %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid + %va = load atomic i32, i32* %buf0_a acquire, align 4 + %vb = load atomic i32, i32* %buf0_b acquire, align 4 + store atomic i32 %va, i32* %buf1_a release, align 4 + store atomic i32 %vb, i32* %buf1_b release, align 4 + %cmp = icmp slt i32 %indvars.iv.next, 3200 + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + attributes #0 = { nounwind uwtable } attributes #1 = { nounwind }