Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -410,6 +410,27 @@ if (!canMergeBlocks(BB, DestBB)) continue; + // Skip merging if BB is terminated by a switch instruction and BB is used + // as an incoming block of a PHI in DestBB. In such case, merging BB and + // DestBB would cause ISel to add COPY instructions in the header of switch. + // This could potentially increase dynamic instructions, especially when the + // switch is in a loop. By keeping the empty block (BB), ISel will place + // COPY instructions in DestBB, not in BB. + BasicBlock *Pred = BB->getUniquePredecessor(); + if (Pred && isa(Pred->getTerminator()) && + BI == BB->getFirstNonPHI()) { + bool IsIncomingBlock = false; + BasicBlock::const_iterator BBI = DestBB->begin(); + while (const PHINode *PN = dyn_cast(BBI++)) { + if (PN->getBasicBlockIndex(BB) >= 0) { + IsIncomingBlock = true; + break; + } + } + if (IsIncomingBlock) + continue; + } + // Do not delete loop preheaders if doing so would create a critical edge. // Loop preheaders can be good locations to spill registers. If the // preheader is deleted and we create a critical edge, registers may be Index: test/CodeGen/AArch64/aarch64-skip-merging-case-block.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-skip-merging-case-block.ll @@ -0,0 +1,178 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +source_filename = "match.c.reduced.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + + +; CHECK: bl checkNum +; CHECK: ldr x[[JT:[0-9]+]], [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3] +; CHECK-NOT: adrp +; CHECK-NOT: mov +; CHECK: br x[[JT]] + +; This test case is generated form the C code below. In current trunk, the +; unconditional branch from the case 58 to do_nref is merged in CodeGen Prepare, +; which cause ISel to add Copy instructions in the basic block for top_switch, +; resulting in MOV and ADPR instructions in the top of the switch. Skipping such +; merge will lead ISel to place COPY instructions in the case, not in the top of +; switch so that we avoid generating MOV and ADRP instructions in the top of +; switch. + +;int F1(const char* a, const char* b, int len) ; +;extern unsigned char PL1[]; +;extern const unsigned char PL2[]; +; +;typedef struct match_state { +; int state; +;} match_state; +; +;typedef struct node { +; unsigned short next; +;}node; +; +;typedef int (*foldFP)(const char *, char const *, int); +; +;S_regmatchByJun(reginfo) { +; match_state *st; +; node *scan; +; node *next; +; long int ln; +; char locinput; +; unsigned int state_num; +; while (scan) { +; next = ((scan)->next); +; top_switch: +; reginfo ? : checkNum(); +; switch (state_num) { +; case 27: +; case27Call(); +; case 58: { +; char s; +; char type; +; foldFP folder; +; unsigned char *FA; +; folder = F1; +; FA= PL1; +; type = 53; +; goto do_nref; +; case 57: +; folder = FA = PL2; +; case 56: +; type = 51; +; do_nref: +; dummyFunc1(scan); +; if (type != FA) +; ln = folder(s, locinput, ln); +; break; +; } +; case (93 + 3): goto yes; +; } +; scan = next; +; match_state * newst = st + 1; +; st = newst; +; } +; goto no; +;yes: +; state_num = st; +; goto top_switch; +;no:; +;} + + +%struct.match_state = type { i32 } +%struct.node = type { i16 } + +@PL1 = external global [0 x i8], align 1 +@PL2 = external constant [0 x i8], align 1 + +; Function Attrs: nounwind +define i32 @S_regmatchByJun(i32 %reginfo) local_unnamed_addr #0 { +while.body.lr.ph: + br label %while.body.us + +while.body.us: ; preds = %while.body.lr.ph, %sw.epilog.us + %st.048.us = phi %struct.match_state* [ %add.ptr.us, %sw.epilog.us ], [ undef, %while.body.lr.ph ] + %FA.046.us = phi i8* [ %FA.3.us, %sw.epilog.us ], [ undef, %while.body.lr.ph ] + %folder.044.us = phi i32 (i8*, i8*, i32)* [ %folder.3.us, %sw.epilog.us ], [ undef, %while.body.lr.ph ] + %scan.043.us = phi %struct.node* [ %1, %sw.epilog.us ], [ undef, %while.body.lr.ph ] + %state_num.042.us = phi i32 [ %state_num.1.us.us70, %sw.epilog.us ], [ undef, %while.body.lr.ph ] + %ln.035.us = phi i64 [ %ln.2.us, %sw.epilog.us ], [ undef, %while.body.lr.ph ] + %next1.us = getelementptr inbounds %struct.node, %struct.node* %scan.043.us, i64 0, i32 0 + %0 = load i16, i16* %next1.us, align 2 + %conv.us = zext i16 %0 to i64 + %1 = inttoptr i64 %conv.us to %struct.node* + %2 = ptrtoint %struct.match_state* %st.048.us to i64 + %3 = trunc i64 %2 to i32 + br label %top_switch.us.us + +do_nref.us.loopexit: ; preds = %top_switch.us.us + br label %do_nref.us + +do_nref.us: ; preds = %do_nref.us.loopexit, %sw.bb5.us-lcssa.us.us, %sw.bb6.loopexit.us-lcssa.us.us, %sw.bb.us-lcssa.us.us + %state_num.1.us.us71 = phi i32 [ 27, %sw.bb.us-lcssa.us.us ], [ %state_num.1.us.us, %sw.bb5.us-lcssa.us.us ], [ %state_num.1.us.us, %sw.bb6.loopexit.us-lcssa.us.us ], [ 58, %do_nref.us.loopexit ] + %type.0.us = phi i8* [ inttoptr (i64 53 to i8*), %sw.bb.us-lcssa.us.us ], [ inttoptr (i64 51 to i8*), %sw.bb5.us-lcssa.us.us ], [ inttoptr (i64 51 to i8*), %sw.bb6.loopexit.us-lcssa.us.us ], [ inttoptr (i64 53 to i8*), %do_nref.us.loopexit ] + %folder.2.us = phi i32 (i8*, i8*, i32)* [ @F1, %sw.bb.us-lcssa.us.us ], [ bitcast ([0 x i8]* @PL2 to i32 (i8*, i8*, i32)*), %sw.bb5.us-lcssa.us.us ], [ %folder.044.us, %sw.bb6.loopexit.us-lcssa.us.us ], [ @F1, %do_nref.us.loopexit ] + %FA.2.us = phi i8* [ getelementptr inbounds ([0 x i8], [0 x i8]* @PL1, i64 0, i64 0), %sw.bb.us-lcssa.us.us ], [ getelementptr inbounds ([0 x i8], [0 x i8]* @PL2, i64 0, i64 0), %sw.bb5.us-lcssa.us.us ], [ %FA.046.us, %sw.bb6.loopexit.us-lcssa.us.us ], [ getelementptr inbounds ([0 x i8], [0 x i8]* @PL1, i64 0, i64 0), %do_nref.us.loopexit ] + %call7.us = tail call i32 bitcast (i32 (...)* @dummyFunc1 to i32 (%struct.node*)*)(%struct.node* nonnull %scan.043.us) #2 + %cmp.us = icmp eq i8* %type.0.us, %FA.2.us + br i1 %cmp.us, label %sw.epilog.us, label %if.then.us + +if.then.us: ; preds = %do_nref.us + %conv13.us = trunc i64 %ln.035.us to i32 + %call14.us = tail call i32 %folder.2.us(i8* null, i8* null, i32 %conv13.us) #2 + %conv15.us = sext i32 %call14.us to i64 + br label %sw.epilog.us + +sw.epilog.us.loopexit: ; preds = %top_switch.us.us + br label %sw.epilog.us + +sw.epilog.us: ; preds = %sw.epilog.us.loopexit, %if.then.us, %do_nref.us + %state_num.1.us.us70 = phi i32 [ %state_num.1.us.us71, %if.then.us ], [ %state_num.1.us.us71, %do_nref.us ], [ %state_num.1.us.us, %sw.epilog.us.loopexit ] + %ln.2.us = phi i64 [ %conv15.us, %if.then.us ], [ %ln.035.us, %do_nref.us ], [ %ln.035.us, %sw.epilog.us.loopexit ] + %folder.3.us = phi i32 (i8*, i8*, i32)* [ %folder.2.us, %if.then.us ], [ %folder.2.us, %do_nref.us ], [ %folder.044.us, %sw.epilog.us.loopexit ] + %FA.3.us = phi i8* [ %FA.2.us, %if.then.us ], [ %type.0.us, %do_nref.us ], [ %FA.046.us, %sw.epilog.us.loopexit ] + %add.ptr.us = getelementptr inbounds %struct.match_state, %struct.match_state* %st.048.us, i64 1 + %tobool.us = icmp eq i16 %0, 0 + br i1 %tobool.us, label %no, label %while.body.us + +top_switch.us.us: ; preds = %top_switch.us.us, %while.body.us + %state_num.1.us.us = phi i32 [ %state_num.042.us, %while.body.us ], [ %3, %top_switch.us.us ] + %call.us.us = tail call i32 bitcast (i32 (...)* @checkNum to i32 ()*)() #2 + switch i32 %state_num.1.us.us, label %sw.epilog.us.loopexit [ + i32 27, label %sw.bb.us-lcssa.us.us + i32 58, label %do_nref.us.loopexit + i32 57, label %sw.bb5.us-lcssa.us.us + i32 56, label %sw.bb6.loopexit.us-lcssa.us.us + i32 96, label %top_switch.us.us + ] + +sw.bb.us-lcssa.us.us: ; preds = %top_switch.us.us + %call3.us = tail call i32 bitcast (i32 (...)* @case27Call to i32 ()*)() #2 + br label %do_nref.us + +sw.bb5.us-lcssa.us.us: ; preds = %top_switch.us.us + br label %do_nref.us + +sw.bb6.loopexit.us-lcssa.us.us: ; preds = %top_switch.us.us + br label %do_nref.us + +no: ; preds = %sw.epilog.us + ret i32 undef +} + +declare i32 @checkNum(...) local_unnamed_addr #1 + +declare i32 @case27Call(...) local_unnamed_addr #1 + +declare i32 @F1(i8*, i8*, i32) local_unnamed_addr #1 + +declare i32 @dummyFunc1(...) local_unnamed_addr #1 + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="kryo" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="kryo" "target-features"="+crc,+crypto,+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.9.0 (trunk 275756) (llvm/trunk 275801)"}