diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3226,12 +3226,15 @@ // Don't check the branch condition comparison itself. if (&I == Cond) continue; - // Ignore dbg intrinsics, and the terminator. - if (isa(I) || isa(I)) + // Ignore the terminator. + if (isa(I)) continue; // I must be safe to execute unconditionally. if (!isSafeToSpeculativelyExecute(&I)) return false; + // Ignore free instructions. + if (TTI && TTI->getUserCost(&I, CostKind) == TargetTransformInfo::TCC_Free) + continue; // Account for the cost of duplicating this instruction into each // predecessor. diff --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll --- a/llvm/test/CodeGen/AArch64/csr-split.ll +++ b/llvm/test/CodeGen/AArch64/csr-split.ll @@ -82,22 +82,22 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: cbz x0, .LBB1_2 -; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: cbz x0, .LBB1_3 +; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: adrp x8, a ; CHECK-NEXT: ldrsw x8, [x8, :lo12:a] ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: b.eq .LBB1_3 -; CHECK-NEXT: .LBB1_2: // %return -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_3: // %if.then2 +; CHECK-NEXT: b.ne .LBB1_3 +; CHECK-NEXT: // %bb.2: // %if.then2 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: b callNonVoid +; CHECK-NEXT: .LBB1_3: // %return +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret ; ; CHECK-APPLE-LABEL: test2: ; CHECK-APPLE: ; %bb.0: ; %entry @@ -108,26 +108,26 @@ ; CHECK-APPLE-NEXT: .cfi_offset w29, -16 ; CHECK-APPLE-NEXT: .cfi_offset w19, -24 ; CHECK-APPLE-NEXT: .cfi_offset w20, -32 -; CHECK-APPLE-NEXT: cbz x0, LBB1_2 -; CHECK-APPLE-NEXT: ; %bb.1: ; %if.end +; CHECK-APPLE-NEXT: cbz x0, LBB1_3 +; CHECK-APPLE-NEXT: ; %bb.1: ; %entry ; CHECK-APPLE-NEXT: Lloh2: ; CHECK-APPLE-NEXT: adrp x8, _a@PAGE ; CHECK-APPLE-NEXT: Lloh3: ; CHECK-APPLE-NEXT: ldrsw x8, [x8, _a@PAGEOFF] ; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: cmp x8, x0 -; CHECK-APPLE-NEXT: b.eq LBB1_3 -; CHECK-APPLE-NEXT: LBB1_2: ; %return -; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-APPLE-NEXT: mov w0, wzr -; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; CHECK-APPLE-NEXT: ret -; CHECK-APPLE-NEXT: LBB1_3: ; %if.then2 +; CHECK-APPLE-NEXT: b.ne LBB1_3 +; CHECK-APPLE-NEXT: ; %bb.2: ; %if.then2 ; CHECK-APPLE-NEXT: bl _callVoid ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: mov x0, x19 ; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: b _callNonVoid +; CHECK-APPLE-NEXT: LBB1_3: ; %return +; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: mov w0, wzr +; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: ret ; CHECK-APPLE-NEXT: .loh AdrpLdr Lloh2, Lloh3 entry: %tobool = icmp eq i32* %p1, null diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -90,24 +90,31 @@ define float @test_merge_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_merge_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0 -; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 -; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP19]] -; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]] -; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]] -; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 +; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP4]] +; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 +; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP9]] +; CHECK-NEXT: br i1 [[OR_COND2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE11:%.*]] +; CHECK: lor.lhs.false11: +; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 +; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 +; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP14]], i1 true, i1 [[CMP19]] +; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP24]] +; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP29]] +; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP34]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND5]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[LOR_LHS_FALSE11]] ] ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: @@ -261,24 +268,31 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_separate_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0 -; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]] -; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 -; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP18]] -; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP23]] -; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP28]] -; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP33]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 +; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 +; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP4]] +; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 +; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP9]] +; CHECK-NEXT: br i1 [[OR_COND2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE11:%.*]] +; CHECK: lor.lhs.false11: +; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 +; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 +; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP14]], i1 true, i1 [[CMP18]] +; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP23]] +; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP28]] +; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP33]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND5]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[LOR_LHS_FALSE11]] ] ; CHECK-NEXT: ret float [[RETVAL_0]] ; entry: diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-free-cost.ll @@ -8,12 +8,11 @@ define void @f(i8* %a, i8* %b, i1 %c, i1 %d, i1 %e) { ; CHECK-LABEL: @f( -; CHECK-NEXT: br i1 [[C:%.*]], label [[L1:%.*]], label [[L3:%.*]] -; CHECK: l1: ; CHECK-NEXT: [[A1:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* [[A:%.*]]) ; CHECK-NEXT: [[B1:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* [[B:%.*]]) ; CHECK-NEXT: [[I:%.*]] = icmp eq i8* [[A1]], [[B1]] -; CHECK-NEXT: br i1 [[I]], label [[L2:%.*]], label [[L3]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C:%.*]], i1 [[I]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label [[L2:%.*]], label [[L3:%.*]] ; CHECK: l2: ; CHECK-NEXT: call void @g1() ; CHECK-NEXT: br label [[RET:%.*]]