Index: llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/trunk/lib/Transforms/Scalar/JumpThreading.cpp @@ -924,8 +924,8 @@ /// important optimization that encourages jump threading, and needs to be run /// interlaced with other jump threading tasks. bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { - // Don't hack volatile/atomic loads. - if (!LI->isSimple()) return false; + // Don't hack volatile and ordered loads. + if (!LI->isUnordered()) return false; // If the load is defined in a block with exactly one predecessor, it can't be // partially redundant. @@ -1055,9 +1055,10 @@ if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); - LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, - LI->getAlignment(), - UnavailablePred->getTerminator()); + LoadInst *NewVal = + new LoadInst(LoadedPtr, LI->getName() + ".pr", false, + LI->getAlignment(), LI->getOrdering(), LI->getSynchScope(), + UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); Index: llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll =================================================================== --- llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll +++ llvm/trunk/test/Transforms/JumpThreading/thread-loads.ll @@ -107,6 +107,145 @@ ret i32 13 } +define i32 @test4(i32* %P) { +; CHECK-LABEL: @test4( +entry: + %v0 = tail call i32 (...) @f1() + %v1 = icmp eq i32 %v0, 0 + br i1 %v1, label %bb1, label %bb + +bb: +; CHECK: bb1.thread: +; CHECK: store atomic +; CHECK: br label %bb3 + store atomic i32 42, i32* %P unordered, align 4 + br label %bb1 + +bb1: +; CHECK: bb1: +; CHECK-NOT: phi +; CHECK: load atomic + %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] + %v2 = load atomic i32, i32* %P unordered, align 4 + %v3 = icmp sgt i32 %v2, 36 + br i1 %v3, label %bb3, label %bb2 + +bb2: + %v4 = tail call i32 (...) @f2() + ret i32 %res.0 + +bb3: + ret i32 %res.0 +} + +define i32 @test5(i32* %P) { +; Negative test + +; CHECK-LABEL: @test5( +entry: + %v0 = tail call i32 (...) @f1() + %v1 = icmp eq i32 %v0, 0 + br i1 %v1, label %bb1, label %bb + +bb: +; CHECK: bb: +; CHECK-NEXT: store atomic i32 42, i32* %P release, align 4 +; CHECK-NEXT: br label %bb1 + store atomic i32 42, i32* %P release, align 4 + br label %bb1 + +bb1: +; CHECK: bb1: +; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] +; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4 +; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36 +; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2 + + %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] + %v2 = load atomic i32, i32* %P acquire, align 4 + %v3 = icmp sgt i32 %v2, 36 + br i1 %v3, label %bb3, label %bb2 + +bb2: + %v4 = tail call i32 (...) @f2() + ret i32 %res.0 + +bb3: + ret i32 %res.0 +} + +define i32 @test6(i32* %P) { +; Negative test + +; CHECK-LABEL: @test6( +entry: + %v0 = tail call i32 (...) @f1() + %v1 = icmp eq i32 %v0, 0 + br i1 %v1, label %bb1, label %bb + +bb: +; CHECK: bb: +; CHECK-NEXT: store i32 42, i32* %P +; CHECK-NEXT: br label %bb1 + store i32 42, i32* %P + br label %bb1 + +bb1: +; CHECK: bb1: +; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] +; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4 +; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36 +; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2 + + %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] + %v2 = load atomic i32, i32* %P acquire, align 4 + %v3 = icmp sgt i32 %v2, 36 + br i1 %v3, label %bb3, label %bb2 + +bb2: + %v4 = tail call i32 (...) @f2() + ret i32 %res.0 + +bb3: + ret i32 %res.0 +} + +define i32 @test7(i32* %P) { +; Negative test + +; CHECK-LABEL: @test7( +entry: + %v0 = tail call i32 (...) @f1() + %v1 = icmp eq i32 %v0, 0 + br i1 %v1, label %bb1, label %bb + +bb: +; CHECK: bb: +; CHECK-NEXT: %val = load i32, i32* %P +; CHECK-NEXT: br label %bb1 + %val = load i32, i32* %P + br label %bb1 + +bb1: +; CHECK: bb1: +; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] +; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4 +; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36 +; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2 + + %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ] + %v2 = load atomic i32, i32* %P acquire, align 4 + %v3 = icmp sgt i32 %v2, 36 + br i1 %v3, label %bb3, label %bb2 + +bb2: + %v4 = tail call i32 (...) @f2() + ret i32 %res.0 + +bb3: + ret i32 %res.0 +} + !0 = !{!3, !3, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA", null}