Index: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -365,7 +365,7 @@ // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass()); + addPass(createLateCFGSimplificationPass()); // Run LoopDataPrefetch // Index: llvm/trunk/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ llvm/trunk/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* -; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* +; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] Index: llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ llvm/trunk/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -91,3 +91,63 @@ declare void @bar() declare void @baz() + +define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) { +; CHECK-LABEL: test_conditional2: +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x19] +; CHECK: cmp [[LOADED]], w21 +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxr [[STATUS:w[0-9]+]], w20, [x19] +; CHECK: cbnz [[STATUS]], [[LOOP]] +; CHECK: orr [[STATUS]], wzr, #0x1 +; CHECK: b [[PH:LBB[0-9]+_[0-9]+]] + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} + +; verify the preheader is simplified by latesimplifycfg. +; CHECK: [[PH]]: +; CHECK: orr w22, wzr, #0x2 +; CHECK-NOT: orr w22, wzr, #0x4 +; CHECK-NOT: cmn w22, #4 +; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]] +; CHECK-NOT: b.ne [[LOOP2]] +; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}} +; CHECK: bl _foo +entry: + %pair = cmpxchg i32* %c, i32 %a, i32 %b seq_cst seq_cst + %success = extractvalue { i32, i1 } %pair, 1 + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ %success, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + tail call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ false, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo()