diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -399,6 +399,8 @@ bool optimizeFunnelShift(IntrinsicInst *Fsh); bool optimizeSelectInst(SelectInst *SI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); + bool optimizeSwitchType(SwitchInst *SI); + bool optimizeSwitchPhiConstants(SwitchInst *SI); bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); @@ -6979,7 +6981,7 @@ return Changed; } -bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { +bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { Value *Cond = SI->getCondition(); Type *OldType = Cond->getType(); LLVMContext &Context = Cond->getContext(); @@ -7028,6 +7030,60 @@ return true; } +bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) { + // The SCCP optimization tends to produce code like this: + // switch(x) { case 42: phi(42, ...) } + // Materializing the constant for the phi-argument needs instructions; So we + // change the code to: + // switch(x) { case 42: phi(x, ...) } + + bool Changed = false; + BasicBlock *SwitchBB = SI->getParent(); + Value *Condition = SI->getCondition(); + Type *ConditionType = Condition->getType(); + + for (const SwitchInst::CaseHandle &Case : SI->cases()) { + ConstantInt *CaseValue = Case.getCaseValue(); + BasicBlock *CaseBB = Case.getCaseSuccessor(); + // Set to true if we previously checked that `CaseBB` is only reached by + // a single case from this switch. + bool CheckedForSinglePred = false; + for (PHINode &PHI : CaseBB->phis()) { + Type *PHIType = PHI.getType(); + if (PHIType == ConditionType) { + // Set to true to skip this case because of multiple preds. + bool SkipCase = false; + for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) { + if (PHI.getIncomingValue(I) != CaseValue || + PHI.getIncomingBlock(I) != SwitchBB) + continue; + // We cannot optimize if there are multiple case labels jumping to + // this block. This check may get expensive when there are many + // case labels so we test for it last. + if (!CheckedForSinglePred) { + CheckedForSinglePred = true; + if (SI->findCaseDest(CaseBB) == nullptr) { + SkipCase = true; + break; + } + } + + PHI.setIncomingValue(I, Condition); + Changed = true; + } + if (SkipCase) + break; + } + } + } + return Changed; +} + +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + bool Changed = optimizeSwitchType(SI); + Changed |= optimizeSwitchPhiConstants(SI); + return Changed; +} namespace { diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -46,11 +46,11 @@ ; CHECK-NEXT: ## %bb.2: ## %if.then4 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_55 +; CHECK-NEXT: je LBB0_54 ; CHECK-NEXT: ## %bb.3: ## %SyTime.exit ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_55 +; CHECK-NEXT: je LBB0_54 ; CHECK-NEXT: LBB0_4: ## %cleanup ; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 ; CHECK-NEXT: popq %rbx @@ -63,7 +63,7 @@ ; CHECK-NEXT: LBB0_5: ## %if.end25 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_55 +; CHECK-NEXT: je LBB0_54 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx ; CHECK-NEXT: movq %rdi, %rbp @@ -76,6 +76,7 @@ ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@GOTPCREL(%rip), %rcx ; CHECK-NEXT: leaq 8(%rcx,%rax), %rdx @@ -90,232 +91,223 @@ ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: xorl %r13d, %r13d -; CHECK-NEXT: testb %r13b, %r13b +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: testb %r14b, %r14b ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: leaq LJTI0_0(%rip), %rdx ; CHECK-NEXT: leaq LJTI0_1(%rip), %rbx ; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: xorl %r12d, %r12d ; CHECK-NEXT: jmp LBB0_13 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_20: ## %sw.bb256 +; CHECK-NEXT: LBB0_43: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r13d, %r14d -; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge +; CHECK-NEXT: testb %r13b, %r13b +; CHECK-NEXT: je LBB0_54 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_20: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: jle LBB0_22 +; CHECK-NEXT: movl %r14d, %r12d +; CHECK-NEXT: jle LBB0_21 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 -; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r13), %eax +; CHECK-NEXT: ## Child Loop BB0_28 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_37 Depth 2 +; CHECK-NEXT: leal -268(%r14), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 -; CHECK-NEXT: ## %bb.56: ## %while.body200 +; CHECK-NEXT: ## %bb.55: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movslq (%rbx,%rax,4), %rax ; CHECK-NEXT: addq %rbx, %rax ; CHECK-NEXT: jmpq *%rax -; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %r12b, %r12b -; CHECK-NEXT: movl %r13d, %r14d -; CHECK-NEXT: jne LBB0_21 -; CHECK-NEXT: jmp LBB0_55 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_14: ## %while.body200 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%r13), %eax -; CHECK-NEXT: cmpl $21, %eax -; CHECK-NEXT: ja LBB0_20 -; CHECK-NEXT: ## %bb.15: ## %while.body200 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r14d -; CHECK-NEXT: movslq (%rdx,%rax,4), %rax -; CHECK-NEXT: addq %rdx, %rax -; CHECK-NEXT: jmpq *%rax -; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader +; CHECK-NEXT: LBB0_25: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r14d -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_26: ## %sw.bb474 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: testb %r13b, %r13b ; CHECK-NEXT: ## implicit-def: $rbp -; CHECK-NEXT: jne LBB0_34 -; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader +; CHECK-NEXT: jne LBB0_33 +; CHECK-NEXT: ## %bb.26: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: testb %r13b, %r13b ; CHECK-NEXT: ## implicit-def: $rbp -; CHECK-NEXT: jne LBB0_34 -; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader +; CHECK-NEXT: jne LBB0_33 +; CHECK-NEXT: ## %bb.27: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: ## implicit-def: $rax -; CHECK-NEXT: jmp LBB0_29 +; CHECK-NEXT: jmp LBB0_28 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_32: ## %do.body479.backedge -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 +; CHECK-NEXT: LBB0_31: ## %do.body479.backedge +; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2 ; CHECK-NEXT: leaq 1(%rbp), %rax -; CHECK-NEXT: testb %r12b, %r12b -; CHECK-NEXT: je LBB0_33 -; CHECK-NEXT: LBB0_29: ## %land.rhs485 +; CHECK-NEXT: testb %r13b, %r13b +; CHECK-NEXT: je LBB0_32 +; CHECK-NEXT: LBB0_28: ## %land.rhs485 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: js LBB0_55 -; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 +; CHECK-NEXT: js LBB0_54 +; CHECK-NEXT: ## %bb.29: ## %cond.true.i.i2780 +; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2 ; CHECK-NEXT: movq %rax, %rbp -; CHECK-NEXT: testb %r12b, %r12b -; CHECK-NEXT: jne LBB0_32 -; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 +; CHECK-NEXT: testb %r13b, %r13b +; CHECK-NEXT: jne LBB0_31 +; CHECK-NEXT: ## %bb.30: ## %lor.rhs500 +; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: testb %r12b, %r12b -; CHECK-NEXT: jne LBB0_32 -; CHECK-NEXT: jmp LBB0_34 -; CHECK-NEXT: LBB0_45: ## %sw.bb1134 +; CHECK-NEXT: testb %r13b, %r13b +; CHECK-NEXT: jne LBB0_31 +; CHECK-NEXT: jmp LBB0_33 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_14: ## %while.body200 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: leal 1(%r14), %eax +; CHECK-NEXT: cmpl $21, %eax +; CHECK-NEXT: ja LBB0_20 +; CHECK-NEXT: ## %bb.15: ## %while.body200 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movslq (%rdx,%rax,4), %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: jmpq *%rax +; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl $1, %r14d +; CHECK-NEXT: jmp LBB0_20 +; CHECK-NEXT: LBB0_44: ## %sw.bb1134 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx -; CHECK-NEXT: jb LBB0_55 -; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: jb LBB0_54 +; CHECK-NEXT: ## %bb.45: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: movl $268, %r14d ## imm = 0x10C -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: jmp LBB0_20 +; CHECK-NEXT: LBB0_39: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movl $20, %r14d -; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: jmp LBB0_20 ; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movl $2, %r14d -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit +; CHECK-NEXT: jmp LBB0_20 +; CHECK-NEXT: LBB0_32: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: incq %rbp -; CHECK-NEXT: LBB0_34: ## %if.end517 +; CHECK-NEXT: LBB0_33: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r14), %eax +; CHECK-NEXT: leal -324(%r12), %eax ; CHECK-NEXT: cmpl $59, %eax -; CHECK-NEXT: ja LBB0_35 -; CHECK-NEXT: ## %bb.57: ## %if.end517 +; CHECK-NEXT: ja LBB0_34 +; CHECK-NEXT: ## %bb.56: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movabsq $576460756598390785, %rcx ## imm = 0x800000100000001 ; CHECK-NEXT: btq %rax, %rcx -; CHECK-NEXT: jb LBB0_38 -; CHECK-NEXT: LBB0_35: ## %if.end517 +; CHECK-NEXT: jb LBB0_37 +; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r14d -; CHECK-NEXT: je LBB0_38 -; CHECK-NEXT: ## %bb.36: ## %if.end517 +; CHECK-NEXT: cmpl $11, %r12d +; CHECK-NEXT: je LBB0_37 +; CHECK-NEXT: ## %bb.35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r14d -; CHECK-NEXT: je LBB0_38 -; CHECK-NEXT: ## %bb.37: ## %if.then532 +; CHECK-NEXT: cmpl $24, %r12d +; CHECK-NEXT: je LBB0_37 +; CHECK-NEXT: ## %bb.36: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movq _SyFgets.yank@GOTPCREL(%rip), %rax ; CHECK-NEXT: movb $0, (%rax) ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_38: ## %for.cond534 +; CHECK-NEXT: LBB0_37: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %r12b, %r12b -; CHECK-NEXT: jne LBB0_38 -; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader +; CHECK-NEXT: testb %r13b, %r13b +; CHECK-NEXT: jne LBB0_37 +; CHECK-NEXT: ## %bb.38: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %r12b, %r12b +; CHECK-NEXT: testb %r13b, %r13b ; CHECK-NEXT: movb $0, (%rbp) -; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: leaq LJTI0_0(%rip), %rdx -; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: jmp LBB0_20 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_42: ## %while.cond864 +; CHECK-NEXT: LBB0_41: ## %while.cond864 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_42 +; CHECK-NEXT: jmp LBB0_41 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_43: ## %while.cond962 +; CHECK-NEXT: LBB0_42: ## %while.cond962 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_43 +; CHECK-NEXT: jmp LBB0_42 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_25: ## %for.cond357 +; CHECK-NEXT: LBB0_24: ## %for.cond357 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_25 +; CHECK-NEXT: jmp LBB0_24 ; CHECK-NEXT: LBB0_11: ; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: LBB0_22: ## %while.end1465 +; CHECK-NEXT: LBB0_21: ## %while.end1465 ; CHECK-NEXT: incl %r14d ; CHECK-NEXT: cmpl $16, %r14d -; CHECK-NEXT: ja LBB0_50 -; CHECK-NEXT: ## %bb.23: ## %while.end1465 +; CHECK-NEXT: ja LBB0_49 +; CHECK-NEXT: ## %bb.22: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 ; CHECK-NEXT: btl %r14d, %eax -; CHECK-NEXT: jae LBB0_50 -; CHECK-NEXT: ## %bb.24: +; CHECK-NEXT: jae LBB0_49 +; CHECK-NEXT: ## %bb.23: ; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload -; CHECK-NEXT: LBB0_48: ## %if.then1477 +; CHECK-NEXT: LBB0_47: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write ; CHECK-NEXT: subq %rbp, %rbx ; CHECK-NEXT: movq _syHistory@GOTPCREL(%rip), %rax ; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_49: ## %for.body1723 +; CHECK-NEXT: LBB0_48: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax -; CHECK-NEXT: jmp LBB0_49 -; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit +; CHECK-NEXT: jmp LBB0_48 +; CHECK-NEXT: LBB0_46: ## %if.then1477.loopexit ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload ; CHECK-NEXT: movq %rbx, %rbp -; CHECK-NEXT: jmp LBB0_48 +; CHECK-NEXT: jmp LBB0_47 ; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_41 +; CHECK-NEXT: je LBB0_40 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_17: ## %for.body643.us ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_17 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_41: ## %while.cond661 +; CHECK-NEXT: LBB0_40: ## %while.cond661 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_41 -; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader +; CHECK-NEXT: jmp LBB0_40 +; CHECK-NEXT: LBB0_49: ## %for.cond1480.preheader ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 ; CHECK-NEXT: cmpq %rax, %rax -; CHECK-NEXT: jae LBB0_55 -; CHECK-NEXT: ## %bb.51: ## %for.body1664.lr.ph +; CHECK-NEXT: jae LBB0_54 +; CHECK-NEXT: ## %bb.50: ## %for.body1664.lr.ph ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload -; CHECK-NEXT: jne LBB0_54 -; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader +; CHECK-NEXT: jne LBB0_53 +; CHECK-NEXT: ## %bb.51: ## %while.body1679.preheader ; CHECK-NEXT: incl %ebp ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_53: ## %while.body1679 +; CHECK-NEXT: LBB0_52: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movq (%rbx), %rdi ; CHECK-NEXT: callq _fileno ; CHECK-NEXT: movslq %ebp, %rax ; CHECK-NEXT: leal 1(%rax), %ebp ; CHECK-NEXT: cmpq %rax, %rax -; CHECK-NEXT: jl LBB0_53 -; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader +; CHECK-NEXT: jl LBB0_52 +; CHECK-NEXT: LBB0_53: ## %while.cond1683.preheader ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: LBB0_55: ## %if.then.i +; CHECK-NEXT: LBB0_54: ## %if.then.i ; CHECK-NEXT: ud2 entry: %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64 diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll b/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll --- a/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll +++ b/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll @@ -721,16 +721,17 @@ ; X64-NEXT: jmpq *%rsi ; X64-NEXT: .LBB7_2: # %bb0 ; X64-NEXT: cmovbeq %r10, %r9 -; X64-NEXT: movl (%rsi), %eax -; X64-NEXT: orl %r9d, %eax +; X64-NEXT: movl (%rsi), %edi +; X64-NEXT: orl %r9d, %edi ; X64-NEXT: movq $.LBB7_3, %rsi ; X64-NEXT: .LBB7_3: # Block address taken ; X64-NEXT: # %bb1 ; X64-NEXT: cmpq $.LBB7_3, %rsi ; X64-NEXT: cmovneq %r10, %r9 -; X64-NEXT: addl (%rdx), %eax -; X64-NEXT: orl %r9d, %eax +; X64-NEXT: addl (%rdx), %edi +; X64-NEXT: orl %r9d, %edi ; X64-NEXT: movq $.LBB7_4, %rsi +; X64-NEXT: movl %edi, %eax ; X64-NEXT: .LBB7_4: # Block address taken ; X64-NEXT: # %bb2 ; X64-NEXT: cmpq $.LBB7_4, %rsi @@ -764,24 +765,25 @@ ; X64-PIC-NEXT: cmovaq %r10, %r9 ; X64-PIC-NEXT: xorl %eax, %eax ; X64-PIC-NEXT: movl %edi, %esi -; X64-PIC-NEXT: leaq .LJTI7_0(%rip), %rdi -; X64-PIC-NEXT: movslq (%rdi,%rsi,4), %rsi -; X64-PIC-NEXT: addq %rdi, %rsi +; X64-PIC-NEXT: leaq .LJTI7_0(%rip), %r11 +; X64-PIC-NEXT: movslq (%r11,%rsi,4), %rsi +; X64-PIC-NEXT: addq %r11, %rsi ; X64-PIC-NEXT: orq %r9, %rsi ; X64-PIC-NEXT: jmpq *%rsi ; X64-PIC-NEXT: .LBB7_2: # %bb0 ; X64-PIC-NEXT: cmovbeq %r10, %r9 -; X64-PIC-NEXT: movl (%rsi), %eax -; X64-PIC-NEXT: orl %r9d, %eax +; X64-PIC-NEXT: movl (%rsi), %edi +; X64-PIC-NEXT: orl %r9d, %edi ; X64-PIC-NEXT: leaq .LBB7_3(%rip), %rsi ; X64-PIC-NEXT: .LBB7_3: # Block address taken ; X64-PIC-NEXT: # %bb1 -; X64-PIC-NEXT: leaq .LBB7_3(%rip), %rdi -; X64-PIC-NEXT: cmpq %rdi, %rsi +; X64-PIC-NEXT: leaq .LBB7_3(%rip), %rax +; X64-PIC-NEXT: cmpq %rax, %rsi ; X64-PIC-NEXT: cmovneq %r10, %r9 -; X64-PIC-NEXT: addl (%rdx), %eax -; X64-PIC-NEXT: orl %r9d, %eax +; X64-PIC-NEXT: addl (%rdx), %edi +; X64-PIC-NEXT: orl %r9d, %edi ; X64-PIC-NEXT: leaq .LBB7_4(%rip), %rsi +; X64-PIC-NEXT: movl %edi, %eax ; X64-PIC-NEXT: .LBB7_4: # Block address taken ; X64-PIC-NEXT: # %bb2 ; X64-PIC-NEXT: leaq .LBB7_4(%rip), %rdx @@ -839,14 +841,15 @@ ; X64-RETPOLINE-NEXT: jmp .LBB8_12 ; X64-RETPOLINE-NEXT: .LBB8_8: ; X64-RETPOLINE-NEXT: cmoveq %r10, %r9 -; X64-RETPOLINE-NEXT: movl (%rsi), %eax -; X64-RETPOLINE-NEXT: orl %r9d, %eax +; X64-RETPOLINE-NEXT: movl (%rsi), %edi +; X64-RETPOLINE-NEXT: orl %r9d, %edi ; X64-RETPOLINE-NEXT: jmp .LBB8_9 ; X64-RETPOLINE-NEXT: .LBB8_2: ; X64-RETPOLINE-NEXT: cmovneq %r10, %r9 ; X64-RETPOLINE-NEXT: .LBB8_9: # %bb1 -; X64-RETPOLINE-NEXT: addl (%rdx), %eax -; X64-RETPOLINE-NEXT: orl %r9d, %eax +; X64-RETPOLINE-NEXT: addl (%rdx), %edi +; X64-RETPOLINE-NEXT: orl %r9d, %edi +; X64-RETPOLINE-NEXT: movl %edi, %eax ; X64-RETPOLINE-NEXT: .LBB8_10: # %bb2 ; X64-RETPOLINE-NEXT: addl (%rcx), %eax ; X64-RETPOLINE-NEXT: orl %r9d, %eax diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll --- a/llvm/test/CodeGen/X86/switch-phi-const.ll +++ b/llvm/test/CodeGen/X86/switch-phi-const.ll @@ -7,35 +7,29 @@ ; CHECK-LABEL: switch_phi_const: ; CHECK: # %bb.0: # %bb0 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: decl %edi -; CHECK-NEXT: cmpl $54, %edi +; CHECK-NEXT: leal -1(%rdi), %ecx +; CHECK-NEXT: cmpl $54, %ecx ; CHECK-NEXT: ja .LBB0_8 ; CHECK-NEXT: # %bb.1: # %bb0 ; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: movl $13, %edx -; CHECK-NEXT: movl $5, %esi -; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: jmpq *.LJTI0_0(,%rdi,8) +; CHECK-NEXT: jmpq *.LJTI0_0(,%rcx,8) ; CHECK-NEXT: .LBB0_2: # %case_7 ; CHECK-NEXT: movq g@GOTPCREL(%rip), %rax -; CHECK-NEXT: movl (%rax), %ecx +; CHECK-NEXT: movl (%rax), %edi ; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax ; CHECK-NEXT: movl $7, (%rax) ; CHECK-NEXT: .LBB0_3: # %case_1_loop ; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax ; CHECK-NEXT: movl $1, (%rax) -; CHECK-NEXT: movl %ecx, %esi ; CHECK-NEXT: .LBB0_4: # %case_5 ; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax ; CHECK-NEXT: movl $5, (%rax) -; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: .LBB0_5: # %case_13 ; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax ; CHECK-NEXT: movl $13, (%rax) -; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: .LBB0_6: # %case_42 -; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rcx -; CHECK-NEXT: movl %eax, (%rcx) +; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl %edi, (%rax) ; CHECK-NEXT: movl $55, %eax ; CHECK-NEXT: .LBB0_7: # %case_55 ; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rcx diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/switch-phi-const.ll b/llvm/test/Transforms/CodeGenPrepare/X86/switch-phi-const.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/switch-phi-const.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -mtriple=x86_64-- -codegenprepare -S | FileCheck %s +@g = global i32 0 +@effect = global i32 0 + +define void @switch_phi_const(i32 %x) { +; CHECK-LABEL: @switch_phi_const( +; CHECK-NEXT: bb0: +; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [ +; CHECK-NEXT: i32 13, label [[CASE_13:%.*]] +; CHECK-NEXT: i32 42, label [[CASE_42:%.*]] +; CHECK-NEXT: i32 50, label [[CASE_50_51:%.*]] +; CHECK-NEXT: i32 51, label [[CASE_50_51]] +; CHECK-NEXT: i32 55, label [[CASE_55:%.*]] +; CHECK-NEXT: i32 7, label [[CASE_7:%.*]] +; CHECK-NEXT: ] +; CHECK: case_13: +; CHECK-NEXT: [[X0:%.*]] = phi i32 [ [[X]], [[BB0:%.*]] ], [ [[X_LOOPBACK:%.*]], [[CASE_7]] ] +; CHECK-NEXT: store i32 13, i32* @effect, align 4 +; CHECK-NEXT: br label [[CASE_42]] +; CHECK: case_42: +; CHECK-NEXT: [[X1:%.*]] = phi i32 [ [[X]], [[BB0]] ], [ [[X0]], [[CASE_13]] ] +; CHECK-NEXT: store i32 [[X1]], i32* @effect, align 4 +; CHECK-NEXT: br label [[CASE_50_51]] +; CHECK: case_50_51: +; CHECK-NEXT: [[X2:%.*]] = phi i32 [ 50, [[BB0]] ], [ 50, [[BB0]] ], [ [[X1]], [[CASE_42]] ] +; CHECK-NEXT: [[X2_2:%.*]] = phi i32 [ 51, [[BB0]] ], [ 51, [[BB0]] ], [ [[X1]], [[CASE_42]] ] +; CHECK-NEXT: store i32 [[X2]], i32* @effect, align 4 +; CHECK-NEXT: store i32 [[X2_2]], i32* @effect, align 4 +; CHECK-NEXT: br label [[CASE_55]] +; CHECK: case_55: +; CHECK-NEXT: [[X3:%.*]] = phi i32 [ 42, [[BB0]] ], [ 55, [[CASE_50_51]] ] +; CHECK-NEXT: store i32 [[X3]], i32* @effect, align 4 +; CHECK-NEXT: br label [[DEFAULT]] +; CHECK: case_7: +; CHECK-NEXT: [[X_LOOPBACK]] = load i32, i32* @g, align 4 +; CHECK-NEXT: store i32 7, i32* @effect, align 4 +; CHECK-NEXT: br label [[CASE_13]] +; CHECK: default: +; CHECK-NEXT: ret void +; +bb0: + switch i32 %x, label %default [ + i32 13, label %case_13 + i32 42, label %case_42 + i32 50, label %case_50_51 + i32 51, label %case_50_51 + i32 55, label %case_55 + i32 7, label %case_7 + ] + +case_13: + ; We should replace 13 with %x + %x0 = phi i32 [ 13, %bb0 ], [ %x_loopback, %case_7 ] + store i32 13, i32* @effect, align 4 + br label %case_42 + +case_42: + ; We should replace 42 with %x + %x1 = phi i32 [ 42, %bb0 ], [ %x0, %case_13 ] + store i32 %x1, i32* @effect, align 4 + br label %case_50_51 + +case_50_51: + ; Must not replace the PHI argument: Case values 50 and 51 jump here. + %x2 = phi i32 [ 50, %bb0 ], [ 50, %bb0 ], [ %x1, %case_42 ] + %x2.2 = phi i32 [ 51, %bb0 ], [ 51, %bb0 ], [ %x1, %case_42 ] + store i32 %x2, i32* @effect, align 4 + store i32 %x2.2, i32* @effect, align 4 + br label %case_55 + +case_55: + ; We must not replace any of the PHI arguments: + ; - 42 is the wrong constant + ; - %case_42 is not the switch predecessor block. + %x3 = phi i32 [ 42, %bb0 ], [ 55, %case_50_51 ] + store i32 %x3, i32* @effect, align 4 + br label %default + +case_7: + %x_loopback = load i32, i32* @g, align 4 + store i32 7, i32* @effect, align 4 + br label %case_13 + +default: + ret void +} + +define void @switch_phi_const_multiple_phis(i32 %x) { +; CHECK-LABEL: @switch_phi_const_multiple_phis( +; CHECK-NEXT: bb0: +; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[CASE_13:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[X:%.*]], 13 +; CHECK-NEXT: br i1 [[COND]], label [[CASE_13]], label [[DEFAULT:%.*]] +; CHECK: case_13: +; CHECK-NEXT: [[X0:%.*]] = phi i32 [ [[X]], [[BB1]] ], [ 1, [[BB0:%.*]] ] +; CHECK-NEXT: [[N0:%.*]] = phi i32 [ 14, [[BB1]] ], [ 1, [[BB0]] ] +; CHECK-NEXT: [[X1:%.*]] = phi i32 [ 27, [[BB0]] ], [ [[X]], [[BB1]] ] +; CHECK-NEXT: store volatile i32 [[X0]], i32* @effect, align 4 +; CHECK-NEXT: store volatile i32 [[N0]], i32* @effect, align 4 +; CHECK-NEXT: store volatile i32 [[X1]], i32* @effect, align 4 +; CHECK-NEXT: ret void +; CHECK: default: +; CHECK-NEXT: ret void +; +bb0: + br i1 undef, label %bb1, label %case_13 + +bb1: + switch i32 %x, label %default [ + i32 13, label %case_13 + ] + +case_13: + ; Check that replacement works for multiple PHIs. + ; Should perform replacement for %x0, %x1 but not %n0 + %x0 = phi i32 [13, %bb1], [1, %bb0] + %n0 = phi i32 [14, %bb1], [1, %bb0] + %x1 = phi i32 [27, %bb0], [13, %bb1] + store volatile i32 %x0, i32* @effect, align 4 + store volatile i32 %n0, i32* @effect, align 4 + store volatile i32 %x1, i32* @effect, align 4 + ret void + +default: + ret void +}