Index: llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/branch-fold-multiple.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; REQUIRES: amdgpu-registered-target +; RUN: opt %s -S -o - -simplifycfg | FileCheck %s + +target triple = "amdgcn-amd-amdhsa" + +%struct.S = type { [4 x i32] } + +; Check the second, third, and fourth basic blocks are folded into +; the first basic block since each has one bonus intruction, which +; is below the default bouns instruction threshold 2. + +define zeroext i1 @test1(%struct.S* nocapture noundef nonnull readonly align 4 dereferenceable(16) %this) unnamed_addr #0 align 2 { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS:%.*]], i64 0, i32 0, i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP0]], 0 +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[THIS]], i64 0, i32 0, i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[TMP1]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]] +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[THIS]], i64 0, i32 0, i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[TMP2]], 0 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]] +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[THIS]], i64 0, i32 0, i64 3 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[TMP3]], 0 +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true +; CHECK-NEXT: ret i1 [[SPEC_SELECT]] +; +entry: + %arrayidx = getelementptr inbounds %struct.S, %struct.S* %this, i64 0, i32 0, i64 0 + %0 = load i32, i32* %arrayidx, align 4 + %cmp2 = icmp sgt i32 %0, 0 + br i1 %cmp2, label %cleanup, label %for.cond + +for.cond: + %arrayidx.1 = getelementptr inbounds %struct.S, %struct.S* %this, i64 0, i32 0, i64 1 + %1 = load i32, i32* %arrayidx.1, align 4 + %cmp2.1 = icmp sgt i32 %1, 0 + br i1 %cmp2.1, label %cleanup, label %for.cond.1 + +for.cond.1: + %arrayidx.2 = getelementptr inbounds %struct.S, %struct.S* %this, i64 0, i32 0, i64 2 + %2 = load i32, i32* %arrayidx.2, align 4 + %cmp2.2 = icmp sgt i32 %2, 0 + br i1 %cmp2.2, label %cleanup, label %for.cond.2 + +for.cond.2: + %arrayidx.3 = getelementptr inbounds %struct.S, %struct.S* %this, i64 0, i32 0, i64 3 + %3 = load i32, i32* %arrayidx.3, align 4 + %cmp2.3 = icmp sgt i32 %3, 0 + br i1 %cmp2.3, label %cleanup, label %for.cond.3 + +for.cond.3: + br label %cleanup + +cleanup: + %cmp = phi i1 [ false, %entry ], [ false, %for.cond ], [ false, %for.cond.1 ], [ false, %for.cond.2 ], [ true, %for.cond.3 ] + ret i1 %cmp +} + +; Check the second, third, and forth basic blocks are folded into the first +; basi block since each has no bonus instruction. + +define zeroext i1 @test2(i32 %0, i32 %1, i32 %2, i32 %3) unnamed_addr #0 align 2 { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[TMP1:%.*]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP2_1]] +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_2]] +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[TMP3:%.*]], 0 +; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP2_3]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 true +; CHECK-NEXT: ret i1 [[SPEC_SELECT]] +; +entry: + %cmp2 = icmp sgt i32 %0, 0 + br i1 %cmp2, label %cleanup, label %for.cond + +for.cond: + %cmp2.1 = icmp sgt i32 %1, 0 + br i1 %cmp2.1, label %cleanup, label %for.cond.1 + +for.cond.1: + %cmp2.2 = icmp sgt i32 %2, 0 + br i1 %cmp2.2, label %cleanup, label %for.cond.2 + +for.cond.2: + %cmp2.3 = icmp sgt i32 %3, 0 + br i1 %cmp2.3, label %cleanup, label %for.cond.3 + +for.cond.3: + br label %cleanup + +cleanup: + %cmp = phi i1 [ false, %entry ], [ false, %for.cond ], [ false, %for.cond.1 ], [ false, %for.cond.2 ], [ true, %for.cond.3 ] + ret i1 %cmp +} + +; Check the second basic block is not folded into the first basic block +; since it has three bonus instructions, which exceeds the default bonus +; instruction threshold 2. The third and fourth basic blocks are folded +; into the second basic block since they do not have bonus instruction. + +define zeroext i1 @test3(i32 %0, i32 %1, i32 %2, i32 %3) unnamed_addr #0 align 2 { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP0:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP2]], label [[CLEANUP:%.*]], label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[TMP0]], [[TMP1:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[MUL1]], [[TMP2:%.*]] +; CHECK-NEXT: [[MUL3:%.*]] = mul i32 [[MUL2]], [[TMP3:%.*]] +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp sgt i32 [[MUL3]], 0 +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp sgt i32 [[TMP2]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP2_1]], i1 true, i1 [[CMP2_2]] +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp sgt i32 [[TMP3]], 0 +; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP2_3]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND1]], i1 false, i1 true +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[SPEC_SELECT]], [[FOR_COND]] ] +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %cmp2 = icmp sgt i32 %0, 0 + br i1 %cmp2, label %cleanup, label %for.cond + +for.cond: + %mul1 = mul i32 %0, %1 + %mul2 = mul i32 %mul1, %2 + %mul3 = mul i32 %mul2, %3 + %cmp2.1 = icmp sgt i32 %mul3, 0 + br i1 %cmp2.1, label %cleanup, label %for.cond.1 + +for.cond.1: + %cmp2.2 = icmp sgt i32 %2, 0 + br i1 %cmp2.2, label %cleanup, label %for.cond.2 + +for.cond.2: + %cmp2.3 = icmp sgt i32 %3, 0 + br i1 %cmp2.3, label %cleanup, label %for.cond.3 + +for.cond.3: + br label %cleanup + +cleanup: + %cmp = phi i1 [ false, %entry ], [ false, %for.cond ], [ false, %for.cond.1 ], [ false, %for.cond.2 ], [ true, %for.cond.3 ] + ret i1 %cmp +} + +attributes #0 = { "target-cpu"="gfx906"}