diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; Test case: +; - Addition should be cheaper than multiplication + +; The following LLVM IR simulates: +; int8_t flag2[8193]; +; void test(int i) { +; int tmp = i * 2; +; if (i * 2 > 8192) return ; +; for (int j = 0; ; ++j) { +; int offset = j * i + tmp; +; flag2[offset] = 0; +; if (offset + i > 8192) break; +; } +; } + +; After LSR: +; int8_t flag2[8193]; +; void test(int i) { +; int j = i * 2; +; if (j > 8193) return ; +; do { +; flag2[j] = 0; +; j += i; +; } while (j < 8193); +; } + +@flags2 = internal global [8193 x i8] zeroinitializer, align 32 ; <[8193 x i8]*> [#uses=1] + +define void @test(i32 signext %i) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: lui a3, 2 +; RV32-NEXT: blt a3, a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %bb.preheader +; RV32-NEXT: lui a2, %hi(flags2) +; RV32-NEXT: addi a2, a2, %lo(flags2) +; RV32-NEXT: addi a3, a3, 1 +; RV32-NEXT: .LBB0_2: # %bb +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: add a4, a1, a2 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: sb zero, 0(a4) +; RV32-NEXT: blt a1, a3, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: mv s0, a0 +; RV64-NEXT: slliw s2, a0, 1 +; RV64-NEXT: lui a0, 2 +; RV64-NEXT: blt a0, s2, .LBB0_3 +; RV64-NEXT: # %bb.1: # %bb.preheader +; RV64-NEXT: li s1, 0 +; RV64-NEXT: lui a1, %hi(flags2) +; RV64-NEXT: addi s3, a1, %lo(flags2) +; RV64-NEXT: addiw s4, a0, 1 +; RV64-NEXT: .LBB0_2: # %bb +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: mv a1, s0 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: addw a0, a0, s2 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: add a1, a1, s3 +; RV64-NEXT: sb zero, 0(a1) +; RV64-NEXT: addw a0, a0, s0 +; RV64-NEXT: addiw s1, s1, 1 +; RV64-NEXT: blt a0, s4, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: ret +entry: + %k_addr.012 = shl i32 %i, 1 ; [#uses=1] + %tmp14 = icmp sgt i32 %k_addr.012, 8192 ; [#uses=1] + br i1 %tmp14, label %return, label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; [#uses=2] + %tmp. = shl i32 %i, 1 ; [#uses=1] + %tmp.15 = mul i32 %indvar, %i ; [#uses=1] + %tmp.16 = add i32 %tmp.15, %tmp. ; [#uses=2] + %k_addr.0.0 = bitcast i32 %tmp.16 to i32 ; [#uses=1] + %gep.upgrd.1 = zext i32 %tmp.16 to i64 ; [#uses=1] + %tmp = getelementptr [8193 x i8], [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1 ; [#uses=1] + store i8 0, i8* %tmp + %k_addr.0 = add i32 %k_addr.0.0, %i ; [#uses=1] + %tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192 ; [#uses=1] + %indvar.next = add i32 %indvar, 1 ; [#uses=1] + br i1 %tmp.upgrd.2, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-ivusers.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-ivusers.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-ivusers.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; Testcase: +; - Test `CollectIVUsers` + + %struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] } + %struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] } + %struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] } + %struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 } + +define fastcc void @outer_loop(%struct.lame_global_flags* nocapture %gfp, double* nocapture %xr, i32 signext %targ_bits, double* nocapture %best_noise, %struct.III_psy_xmin* nocapture %l3_xmin, i32* nocapture %l3_enc, %struct.III_scalefac_t* nocapture %scalefac, %struct.gr_info* nocapture %cod_info, i32 signext %ch) nounwind { +; RV32-LABEL: outer_loop: +; RV32: # %bb.0: # %entry +; RV32-NEXT: li a0, 88 +; RV32-NEXT: li a1, 168 +; RV32-NEXT: li a2, 11 +; RV32-NEXT: .LBB0_1: # %bb28.i37 +; RV32-NEXT: # =>This Loop Header: Depth=1 +; RV32-NEXT: # Child Loop BB0_2 Depth 2 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: .LBB0_2: # %bb29.i38 +; RV32-NEXT: # Parent Loop BB0_1 Depth=1 +; RV32-NEXT: # => This Inner Loop Header: Depth=2 +; RV32-NEXT: addi a3, a3, 1 +; RV32-NEXT: addi a5, a5, 12 +; RV32-NEXT: addi a4, a4, 8 +; RV32-NEXT: bgeu a2, a3, .LBB0_2 +; RV32-NEXT: # %bb.3: # %bb28.i37.loopexit +; RV32-NEXT: # in Loop: Header=BB0_1 Depth=1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: addi a1, a1, 168 +; RV32-NEXT: j .LBB0_1 +; +; RV64-LABEL: outer_loop: +; RV64: # %bb.0: # %entry +; RV64-NEXT: li a0, 0 +; RV64-NEXT: li a1, 11 +; RV64-NEXT: .LBB0_1: # %bb28.i37 +; RV64-NEXT: # =>This Loop Header: Depth=1 +; RV64-NEXT: # Child Loop BB0_2 Depth 2 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: addiw a0, a0, 1 +; RV64-NEXT: .LBB0_2: # %bb29.i38 +; RV64-NEXT: # Parent Loop BB0_1 Depth=1 +; RV64-NEXT: # => This Inner Loop Header: Depth=2 +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: bgeu a1, a2, .LBB0_2 +; RV64-NEXT: j .LBB0_1 +entry: + br label %bb4 + +bb4: ; preds = %bb4, %entry + br i1 true, label %bb5, label %bb4 + +bb5: ; preds = %bb4 + br i1 true, label %bb28.i37, label %bb.i4 + +bb.i4: ; preds = %bb.i4, %bb5 + br label %bb.i4 + +bb28.i37: ; preds = %bb33.i47, %bb5 + %i.1.reg2mem.0.i = phi i32 [ %0, %bb33.i47 ], [ 0, %bb5 ] ; [#uses=2] + %0 = add i32 %i.1.reg2mem.0.i, 1 ; [#uses=2] + br label %bb29.i38 + +bb29.i38: ; preds = %bb33.i47, %bb28.i37 + %indvar32.i = phi i32 [ %indvar.next33.i, %bb33.i47 ], [ 0, %bb28.i37 ] ; [#uses=2] + %sfb.314.i = add i32 %indvar32.i, 0 ; [#uses=3] + %1 = getelementptr [4 x [21 x double]], [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i ; [#uses=1] + %2 = load double, double* %1, align 8 ; [#uses=0] + br i1 false, label %bb30.i41, label %bb33.i47 + +bb30.i41: ; preds = %bb29.i38 + %3 = getelementptr %struct.III_scalefac_t, %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.314.i, i32 %i.1.reg2mem.0.i ; [#uses=1] + store i32 0, i32* %3, align 4 + br label %bb33.i47 + +bb33.i47: ; preds = %bb30.i41, %bb29.i38 + %4 = add i32 %sfb.314.i, 1 ; [#uses=1] + %phitmp.i46 = icmp ugt i32 %4, 11 ; [#uses=1] + %indvar.next33.i = add i32 %indvar32.i, 1 ; [#uses=1] + br i1 %phitmp.i46, label %bb28.i37, label %bb29.i38 +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; Test case: +; - `A` is loop invariant and should be hoisted up to preheader +; FIXME: RV32 is working as expected, but RV64 doesn't + +; The following LLVM IR simulates: +; int A[16][16]; +; void test(int row, int N) { +; for (int i=0; i0; N--) { +; *(ptr-1) = 4; +; *(ptr) = 5; +; ++ptr; +; } +; } + +@A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] + +define void @test(i32 signext %row, i32 signext %N.in) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: blez a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %cond_true.preheader +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: lui a2, %hi(A) +; RV32-NEXT: addi a2, a2, %lo(A) +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: addi a0, a0, 8 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li a3, 5 +; RV32-NEXT: .LBB0_2: # %cond_true +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: sw a2, -4(a0) +; RV32-NEXT: sw a3, 0(a0) +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: bnez a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: blez a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %cond_true.preheader +; RV64-NEXT: li a4, 0 +; RV64-NEXT: lui a2, %hi(A) +; RV64-NEXT: addi a2, a2, %lo(A) +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: li a3, 5 +; RV64-NEXT: .LBB0_2: # %cond_true +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: addiw a5, a4, 1 +; RV64-NEXT: slli a6, a5, 2 +; RV64-NEXT: add a6, a0, a6 +; RV64-NEXT: sw a2, 0(a6) +; RV64-NEXT: addiw a4, a4, 2 +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: sw a3, 0(a4) +; RV64-NEXT: mv a4, a5 +; RV64-NEXT: bne a5, a1, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ret +entry: + %N = bitcast i32 %N.in to i32 ; [#uses=1] + %tmp5 = icmp sgt i32 %N.in, 0 ; [#uses=1] + br i1 %tmp5, label %cond_true, label %return + +cond_true: ; preds = %cond_true, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; [#uses=2] + %i.0.0 = bitcast i32 %indvar to i32 ; [#uses=2] + %tmp2 = add i32 %i.0.0, 1 ; [#uses=1] + %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; [#uses=1] + store i32 4, i32* %tmp + %tmp5.upgrd.1 = add i32 %i.0.0, 2 ; [#uses=1] + %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; [#uses=1] + store i32 5, i32* %tmp7 + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %N ; [#uses=1] + br i1 %exitcond, label %return, label %cond_true + +return: ; preds = %cond_true, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-sink.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-sink.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-sink.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; Test case: +; - Teach LSR sink to sink the immediate portion of the common expression +; back into uses if they fit in address modes of all the uses. + + %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 } + %struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] } + %struct.bitmap_head_def = type { %struct.bitmap_element*, %struct.bitmap_element*, i32 } + %struct.branch_path = type { %struct.rtx_def*, i32 } + %struct.c_lang_decl = type <{ i8, [3 x i8] }> + %struct.constant_descriptor = type { %struct.constant_descriptor*, i8*, %struct.rtx_def*, { x86_fp80 } } + %struct.eh_region = type { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, i32, %struct.bitmap_head_def*, i32, { { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, %struct.rtx_def* } }, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } + %struct.eh_status = type { %struct.eh_region*, %struct.eh_region**, %struct.eh_region*, %struct.eh_region*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.branch_path*, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } + %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %struct.tree_node**, %struct.rtx_def** } + %struct.equiv_table = type { %struct.rtx_def*, %struct.rtx_def* } + %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } + %struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i8, i8, i8 } + %struct.goto_fixup = type { %struct.goto_fixup*, %struct.rtx_def*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.rtx_def*, %struct.tree_node* } + %struct.initial_value_struct = type { i32, i32, %struct.equiv_table* } + %struct.label_chain = type { %struct.label_chain*, %struct.tree_node* } + %struct.lang_decl = type { %struct.c_lang_decl, %struct.tree_node* } + %struct.language_function = type { %struct.stmt_tree_s, %struct.tree_node* } + %struct.machine_function = type { [59 x [3 x %struct.rtx_def*]], i32, i32 } + %struct.nesting = type { %struct.nesting*, %struct.nesting*, i32, %struct.rtx_def*, { { i32, %struct.rtx_def*, %struct.rtx_def*, %struct.nesting*, %struct.tree_node*, %struct.tree_node*, %struct.label_chain*, i32, i32, i32, i32, %struct.rtx_def*, %struct.tree_node** } } } + %struct.pool_constant = type { %struct.constant_descriptor*, %struct.pool_constant*, %struct.pool_constant*, %struct.rtx_def*, i32, i32, i32, i64, i32 } + %struct.rtunion = type { i64 } + %struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] } + %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack* } + %struct.stmt_status = type { %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, i32, i32, %struct.tree_node*, %struct.rtx_def*, i32, i8*, i32, %struct.goto_fixup* } + %struct.stmt_tree_s = type { %struct.tree_node*, %struct.tree_node*, i8*, i32 } + %struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %struct.tree_node*, %struct.tree_node*, i8, i8, i32, i32, i64, i64 } + %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 } + %struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, { %struct.function* }, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* } + %struct.tree_exp = type { %struct.tree_common, i32, [1 x %struct.tree_node*] } + %struct.tree_node = type { %struct.tree_decl } + %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } + %struct.varasm_status = type { %struct.constant_descriptor**, %struct.pool_constant**, %struct.pool_constant*, %struct.pool_constant*, i64, %struct.rtx_def* } + %struct.varray_data = type { [1 x i64] } + %struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data } +@lineno = internal global i32 0 ; [#uses=1] +@tree_code_length = internal global [256 x i32] zeroinitializer +@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.tree_node* (i32, ...)* @build_stmt to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define %struct.tree_node* @build_stmt(i32 %code, ...) nounwind { +; RV32-LABEL: build_stmt: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: sw a6, 40(sp) +; RV32-NEXT: sw a5, 36(sp) +; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call make_node@plt +; RV32-NEXT: lui a1, %hi(tree_code_length) +; RV32-NEXT: addi a1, a1, %lo(tree_code_length) +; RV32-NEXT: slli a2, s0, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lui a2, %hi(lineno) +; RV32-NEXT: lw a2, %lo(lineno)(a2) +; RV32-NEXT: sw a2, 12(a0) +; RV32-NEXT: blez a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %bb.preheader +; RV32-NEXT: addi a2, a0, 16 +; RV32-NEXT: .LBB0_2: # %bb +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: lw a3, 4(sp) +; RV32-NEXT: addi a4, a3, 4 +; RV32-NEXT: sw a4, 4(sp) +; RV32-NEXT: lw a3, 0(a3) +; RV32-NEXT: sw a3, 0(a2) +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a2, a2, 4 +; RV32-NEXT: bnez a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %bb3 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: build_stmt: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi sp, sp, -96 +; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: mv s0, a0 +; RV64-NEXT: sd a7, 88(sp) +; RV64-NEXT: sd a6, 80(sp) +; RV64-NEXT: sd a5, 72(sp) +; RV64-NEXT: sd a4, 64(sp) +; RV64-NEXT: sd a3, 56(sp) +; RV64-NEXT: sd a2, 48(sp) +; RV64-NEXT: sd a1, 40(sp) +; RV64-NEXT: addi a0, sp, 40 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call make_node@plt +; RV64-NEXT: lui a1, %hi(tree_code_length) +; RV64-NEXT: addi a1, a1, %lo(tree_code_length) +; RV64-NEXT: sext.w a2, s0 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lw a1, 0(a1) +; RV64-NEXT: lui a2, %hi(lineno) +; RV64-NEXT: lw a2, %lo(lineno)(a2) +; RV64-NEXT: sw a2, 24(a0) +; RV64-NEXT: blez a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %bb.preheader +; RV64-NEXT: li a2, 0 +; RV64-NEXT: .LBB0_2: # %bb +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: ld a3, 8(sp) +; RV64-NEXT: addi a4, a3, 4 +; RV64-NEXT: sd a4, 8(sp) +; RV64-NEXT: lwu a4, 4(a3) +; RV64-NEXT: lwu a3, 0(a3) +; RV64-NEXT: slli a5, a2, 3 +; RV64-NEXT: add a5, a0, a5 +; RV64-NEXT: sw a4, 36(a5) +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: sw a3, 32(a5) +; RV64-NEXT: bne a2, a1, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %bb3 +; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: ret +entry: + %p = alloca i8* ; [#uses=3] + %p1 = bitcast i8** %p to i8* ; [#uses=2] + call void @llvm.va_start(i8* %p1) + %0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind ; <%struct.tree_node*> [#uses=2] + %1 = getelementptr [256 x i32], [256 x i32]* @tree_code_length, i32 0, i32 %code ; [#uses=1] + %2 = load i32, i32* %1, align 4 ; [#uses=2] + %3 = load i32, i32* @lineno, align 4 ; [#uses=1] + %4 = bitcast %struct.tree_node* %0 to %struct.tree_exp* ; <%struct.tree_exp*> [#uses=2] + %5 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 1 ; [#uses=1] + store i32 %3, i32* %5, align 4 + %6 = icmp sgt i32 %2, 0 ; [#uses=1] + br i1 %6, label %bb, label %bb3 + +bb: ; preds = %bb, %entry + %i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] ; [#uses=2] + %7 = load i8*, i8** %p, align 4 ; [#uses=2] + %8 = getelementptr i8, i8* %7, i32 4 ; [#uses=1] + store i8* %8, i8** %p, align 4 + %9 = bitcast i8* %7 to %struct.tree_node** ; <%struct.tree_node**> [#uses=1] + %10 = load %struct.tree_node*, %struct.tree_node** %9, align 4 ; <%struct.tree_node*> [#uses=1] + %11 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01 ; <%struct.tree_node**> [#uses=1] + store %struct.tree_node* %10, %struct.tree_node** %11, align 4 + %indvar.next = add i32 %i.01, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %2 ; [#uses=1] + br i1 %exitcond, label %bb3, label %bb + +bb3: ; preds = %bb, %entry + call void @llvm.va_end(i8* %p1) + ret %struct.tree_node* %0 +} + +declare void @llvm.va_start(i8*) nounwind + +declare void @llvm.va_end(i8*) nounwind + +declare fastcc %struct.tree_node* @make_node(i32) nounwind