diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-2.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-2.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; Make sure the common loop invariant A is hoisted up to preheader, +; since too many registers are needed to subsume it into the addressing modes. +; It's safe to sink A in when it's not pic. + +@A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] + +define void @test(i32 %row, i32 %N.in) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: blez a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %cond_true.preheader +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: lui a2, %hi(A) +; RV32-NEXT: addi a2, a2, %lo(A) +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: addi a0, a0, 8 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li a3, 5 +; RV32-NEXT: .LBB0_2: # %cond_true +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: sw a2, -4(a0) +; RV32-NEXT: sw a3, 0(a0) +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: bnez a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: blez a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %cond_true.preheader +; RV64-NEXT: li a4, 0 +; RV64-NEXT: lui a2, %hi(A) +; RV64-NEXT: addi a2, a2, %lo(A) +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: li a3, 5 +; RV64-NEXT: .LBB0_2: # %cond_true +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: addiw a5, a4, 1 +; RV64-NEXT: slli a6, a5, 2 +; RV64-NEXT: add a6, a0, a6 +; RV64-NEXT: sw a2, 0(a6) +; RV64-NEXT: addiw a4, a4, 2 +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: sw a3, 0(a4) +; RV64-NEXT: mv a4, a5 +; RV64-NEXT: bne a5, a1, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ret +entry: + %N = bitcast i32 %N.in to i32 ; [#uses=1] + %tmp5 = icmp sgt i32 %N.in, 0 ; [#uses=1] + br i1 %tmp5, label %cond_true, label %return + +cond_true: ; preds = %cond_true, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; [#uses=2] + %i.0.0 = bitcast i32 %indvar to i32 ; [#uses=2] + %tmp2 = add i32 %i.0.0, 1 ; [#uses=1] + %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; [#uses=1] + store i32 4, i32* %tmp + %tmp5.upgrd.1 = add i32 %i.0.0, 2 ; [#uses=1] + %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; [#uses=1] + store i32 5, i32* %tmp7 + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %N ; [#uses=1] + br i1 %exitcond, label %return, label %cond_true + +return: ; preds = %cond_true, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-3.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-3.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +@A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] + +define void @test(i32 %row, i32 %N.in) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: blez a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %cond_true.preheader +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: lui a2, %hi(A) +; RV32-NEXT: addi a2, a2, %lo(A) +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: addi a0, a0, 8 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li a3, 5 +; RV32-NEXT: .LBB0_2: # %cond_true +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: sw a2, -4(a0) +; RV32-NEXT: sw a3, 0(a0) +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: bnez a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: blez a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %cond_true.preheader +; RV64-NEXT: li a4, 0 +; RV64-NEXT: lui a2, %hi(A) +; RV64-NEXT: addi a2, a2, %lo(A) +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, a2, a0 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: li a3, 5 +; RV64-NEXT: .LBB0_2: # %cond_true +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: addiw a5, a4, 1 +; RV64-NEXT: slli a6, a5, 2 +; RV64-NEXT: add a6, a0, a6 +; RV64-NEXT: sw a2, 0(a6) +; RV64-NEXT: addiw a4, a4, 2 +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: sw a3, 0(a4) +; RV64-NEXT: mv a4, a5 +; RV64-NEXT: bne a5, a1, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ret +entry: + %N = bitcast i32 %N.in to i32 ; [#uses=1] + %tmp5 = icmp sgt i32 %N.in, 0 ; [#uses=1] + br i1 %tmp5, label %cond_true, label %return + +cond_true: ; preds = %cond_true, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; [#uses=2] + %i.0.0 = bitcast i32 %indvar to i32 ; [#uses=2] + %tmp2 = add i32 %i.0.0, 1 ; [#uses=1] + %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; [#uses=1] + store i32 4, i32* %tmp + %tmp5.upgrd.1 = add i32 %i.0.0, 2 ; [#uses=1] + %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; [#uses=1] + store i32 5, i32* %tmp7 + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %N ; [#uses=1] + br i1 %exitcond, label %return, label %cond_true + +return: ; preds = %cond_true, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +@A = internal global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2] + +define void @test(i32 %row, i32 %N.in) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: blez a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %cond_true.preheader +; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: lui a2, %hi(A) +; RV32-NEXT: addi a2, a2, %lo(A) +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: addi a0, a0, 8 +; RV32-NEXT: li a2, 4 +; RV32-NEXT: li a3, 5 +; RV32-NEXT: .LBB0_2: # %cond_true +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: sw a2, -4(a0) +; RV32-NEXT: sw a3, 0(a0) +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: bnez a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: sext.w a1, a1 +; RV64-NEXT: blez a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %cond_true.preheader +; RV64-NEXT: li a4, 0 +; RV64-NEXT: lui a2, %hi(A) +; RV64-NEXT: addi a2, a2, %lo(A) +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: slli a0, a0, 6 +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: li a2, 4 +; RV64-NEXT: li a3, 5 +; RV64-NEXT: .LBB0_2: # %cond_true +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: addiw a5, a4, 1 +; RV64-NEXT: slli a6, a5, 2 +; RV64-NEXT: add a6, a0, a6 +; RV64-NEXT: sw a2, 0(a6) +; RV64-NEXT: addiw a4, a4, 2 +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: add a4, a0, a4 +; RV64-NEXT: sw a3, 0(a4) +; RV64-NEXT: mv a4, a5 +; RV64-NEXT: bne a5, a1, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ret +entry: + %N = bitcast i32 %N.in to i32 ; [#uses=1] + %tmp5 = icmp sgt i32 %N.in, 0 ; [#uses=1] + br i1 %tmp5, label %cond_true, label %return + +cond_true: ; preds = %cond_true, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; [#uses=2] + %i.0.0 = bitcast i32 %indvar to i32 ; [#uses=2] + %tmp2 = add i32 %i.0.0, 1 ; [#uses=1] + %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; [#uses=1] + store i32 4, i32* %tmp + %tmp5.upgrd.1 = add i32 %i.0.0, 2 ; [#uses=1] + %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; [#uses=1] + store i32 5, i32* %tmp7 + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %N ; [#uses=1] + br i1 %exitcond, label %return, label %cond_true + +return: ; preds = %cond_true, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce2.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce2.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 +; +; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader. +; CHECK: mov{{.}} {{.*}}$pb + +@flags2 = internal global [8193 x i8] zeroinitializer, align 32 ; <[8193 x i8]*> [#uses=1] + +define void @test(i32 %k, i32 %i) nounwind { +; RV32-LABEL: test: +; RV32: # %bb.0: # %entry +; RV32-NEXT: slli a0, a1, 1 +; RV32-NEXT: lui a3, 2 +; RV32-NEXT: blt a3, a0, .LBB0_3 +; RV32-NEXT: # %bb.1: # %bb.preheader +; RV32-NEXT: lui a2, %hi(flags2) +; RV32-NEXT: addi a2, a2, %lo(flags2) +; RV32-NEXT: addi a3, a3, 1 +; RV32-NEXT: .LBB0_2: # %bb +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: add a4, a0, a2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: sb zero, 0(a4) +; RV32-NEXT: blt a0, a3, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: test: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: slliw s2, a1, 1 +; RV64-NEXT: lui a0, 2 +; RV64-NEXT: blt a0, s2, .LBB0_3 +; RV64-NEXT: # %bb.1: # %bb.preheader +; RV64-NEXT: mv s0, a1 +; RV64-NEXT: li s1, 0 +; RV64-NEXT: lui a1, %hi(flags2) +; RV64-NEXT: addi s3, a1, %lo(flags2) +; RV64-NEXT: addiw s4, a0, 1 +; RV64-NEXT: .LBB0_2: # %bb +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: mv a1, s0 +; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: addw a0, a0, s2 +; RV64-NEXT: slli a1, a0, 32 +; RV64-NEXT: srli a1, a1, 32 +; RV64-NEXT: add a1, a1, s3 +; RV64-NEXT: sb zero, 0(a1) +; RV64-NEXT: addw a0, a0, s0 +; RV64-NEXT: addiw s1, s1, 1 +; RV64-NEXT: blt a0, s4, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: ret +entry: + %k_addr.012 = shl i32 %i, 1 ; [#uses=1] + %tmp14 = icmp sgt i32 %k_addr.012, 8192 ; [#uses=1] + br i1 %tmp14, label %return, label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; [#uses=2] + %tmp. = shl i32 %i, 1 ; [#uses=1] + %tmp.15 = mul i32 %indvar, %i ; [#uses=1] + %tmp.16 = add i32 %tmp.15, %tmp. ; [#uses=2] + %k_addr.0.0 = bitcast i32 %tmp.16 to i32 ; [#uses=1] + %gep.upgrd.1 = zext i32 %tmp.16 to i64 ; [#uses=1] + %tmp = getelementptr [8193 x i8], [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1 ; [#uses=1] + store i8 0, i8* %tmp + %k_addr.0 = add i32 %k_addr.0.0, %i ; [#uses=1] + %tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192 ; [#uses=1] + %indvar.next = add i32 %indvar, 1 ; [#uses=1] + br i1 %tmp.upgrd.2, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce4.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce4.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce4.ll @@ -0,0 +1,139 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; By starting the IV at -64 instead of 0, a cmp is eliminated, +; as the flags from the add can be used directly. + + +@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4] +@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4] + +define i32 @foo() nounwind { +; RV32-LABEL: foo: +; RV32: # %bb.0: # %entry +; RV32-NEXT: li a0, 0 +; RV32-NEXT: li a4, 0 +; RV32-NEXT: lui a1, %hi(state) +; RV32-NEXT: addi a1, a1, %lo(state) +; RV32-NEXT: lui a2, %hi(S) +; RV32-NEXT: addi a2, a2, %lo(S) +; RV32-NEXT: li a3, 64 +; RV32-NEXT: .LBB0_1: # %bb +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: add a5, a1, a0 +; RV32-NEXT: lw a6, 0(a5) +; RV32-NEXT: slli a4, a4, 2 +; RV32-NEXT: add a4, a2, a4 +; RV32-NEXT: lw a4, 0(a4) +; RV32-NEXT: xor a4, a4, a6 +; RV32-NEXT: lw a6, 4(a5) +; RV32-NEXT: slli a7, a4, 2 +; RV32-NEXT: add a7, a2, a7 +; RV32-NEXT: lw a7, 0(a7) +; RV32-NEXT: xor a6, a7, a6 +; RV32-NEXT: lw a7, 8(a5) +; RV32-NEXT: slli t0, a6, 2 +; RV32-NEXT: add t0, a2, t0 +; RV32-NEXT: lw t0, 0(t0) +; RV32-NEXT: xor a7, t0, a7 +; RV32-NEXT: lw t0, 12(a5) +; RV32-NEXT: slli t1, a7, 2 +; RV32-NEXT: add t1, a2, t1 +; RV32-NEXT: lw t1, 0(t1) +; RV32-NEXT: sw a4, 0(a5) +; RV32-NEXT: sw a6, 4(a5) +; RV32-NEXT: sw a7, 8(a5) +; RV32-NEXT: xor a4, t1, t0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: sw a4, 12(a5) +; RV32-NEXT: bne a0, a3, .LBB0_1 +; RV32-NEXT: # %bb.2: # %bb57 +; RV32-NEXT: andi a0, a4, 255 +; RV32-NEXT: ret +; +; RV64-LABEL: foo: +; RV64: # %bb.0: # %entry +; RV64-NEXT: li a0, 0 +; RV64-NEXT: li a4, 0 +; RV64-NEXT: lui a1, %hi(state) +; RV64-NEXT: addi a1, a1, %lo(state) +; RV64-NEXT: lui a2, %hi(S) +; RV64-NEXT: addi a2, a2, %lo(S) +; RV64-NEXT: li a3, 4 +; RV64-NEXT: .LBB0_1: # %bb +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: slliw a5, a0, 2 +; RV64-NEXT: slli a5, a5, 2 +; RV64-NEXT: add a5, a1, a5 +; RV64-NEXT: lw a6, 0(a5) +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: add a4, a2, a4 +; RV64-NEXT: lw a4, 0(a4) +; RV64-NEXT: xor a4, a4, a6 +; RV64-NEXT: lw a6, 4(a5) +; RV64-NEXT: slli a7, a4, 2 +; RV64-NEXT: add a7, a2, a7 +; RV64-NEXT: lw a7, 0(a7) +; RV64-NEXT: xor a6, a7, a6 +; RV64-NEXT: lw a7, 8(a5) +; RV64-NEXT: slli t0, a6, 2 +; RV64-NEXT: add t0, a2, t0 +; RV64-NEXT: lw t0, 0(t0) +; RV64-NEXT: xor a7, t0, a7 +; RV64-NEXT: lw t0, 12(a5) +; RV64-NEXT: slli t1, a7, 2 +; RV64-NEXT: add t1, a2, t1 +; RV64-NEXT: lw t1, 0(t1) +; RV64-NEXT: sw a4, 0(a5) +; RV64-NEXT: sw a6, 4(a5) +; RV64-NEXT: sw a7, 8(a5) +; RV64-NEXT: xor a4, t1, t0 +; RV64-NEXT: addiw a0, a0, 1 +; RV64-NEXT: sw a4, 12(a5) +; RV64-NEXT: bne a0, a3, .LBB0_1 +; RV64-NEXT: # %bb.2: # %bb57 +; RV64-NEXT: andi a0, a4, 255 +; RV64-NEXT: ret +entry: + br label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; [#uses=2] + %t.063.0 = phi i32 [ 0, %entry ], [ %tmp47, %bb ] ; [#uses=1] + %j.065.0 = shl i32 %indvar, 2 ; [#uses=4] + %tmp3 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %j.065.0 ; [#uses=2] + %tmp4 = load i32, i32* %tmp3, align 4 ; [#uses=1] + %tmp6 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %t.063.0 ; [#uses=1] + %tmp7 = load i32, i32* %tmp6, align 4 ; [#uses=1] + %tmp8 = xor i32 %tmp7, %tmp4 ; [#uses=2] + store i32 %tmp8, i32* %tmp3, align 4 + %tmp1378 = or i32 %j.065.0, 1 ; [#uses=1] + %tmp16 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp1378 ; [#uses=2] + %tmp17 = load i32, i32* %tmp16, align 4 ; [#uses=1] + %tmp19 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp8 ; [#uses=1] + %tmp20 = load i32, i32* %tmp19, align 4 ; [#uses=1] + %tmp21 = xor i32 %tmp20, %tmp17 ; [#uses=2] + store i32 %tmp21, i32* %tmp16, align 4 + %tmp2680 = or i32 %j.065.0, 2 ; [#uses=1] + %tmp29 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp2680 ; [#uses=2] + %tmp30 = load i32, i32* %tmp29, align 4 ; [#uses=1] + %tmp32 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp21 ; [#uses=1] + %tmp33 = load i32, i32* %tmp32, align 4 ; [#uses=1] + %tmp34 = xor i32 %tmp33, %tmp30 ; [#uses=2] + store i32 %tmp34, i32* %tmp29, align 4 + %tmp3982 = or i32 %j.065.0, 3 ; [#uses=1] + %tmp42 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp3982 ; [#uses=2] + %tmp43 = load i32, i32* %tmp42, align 4 ; [#uses=1] + %tmp45 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp34 ; [#uses=1] + %tmp46 = load i32, i32* %tmp45, align 4 ; [#uses=1] + %tmp47 = xor i32 %tmp46, %tmp43 ; [#uses=3] + store i32 %tmp47, i32* %tmp42, align 4 + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, 4 ; [#uses=1] + br i1 %exitcond, label %bb57, label %bb + +bb57: ; preds = %bb + %tmp59 = and i32 %tmp47, 255 ; [#uses=1] + ret i32 %tmp59 +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce5.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce5.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +@X = weak global i16 0 ; [#uses=1] +@Y = weak global i16 0 ; [#uses=1] + +define void @foo(i32 %N) nounwind { +; RV32-LABEL: foo: +; RV32: # %bb.0: # %entry +; RV32-NEXT: blez a0, .LBB0_3 +; RV32-NEXT: # %bb.1: # %bb.preheader +; RV32-NEXT: li a1, 0 +; RV32-NEXT: lui a2, %hi(X) +; RV32-NEXT: lui a3, %hi(Y) +; RV32-NEXT: .LBB0_2: # %bb +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: sh a1, %lo(X)(a2) +; RV32-NEXT: slli a4, a1, 2 +; RV32-NEXT: addi a1, a1, 1 +; RV32-NEXT: sh a4, %lo(Y)(a3) +; RV32-NEXT: bne a0, a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %return +; RV32-NEXT: ret +; +; RV64-LABEL: foo: +; RV64: # %bb.0: # %entry +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: blez a0, .LBB0_3 +; RV64-NEXT: # %bb.1: # %bb.preheader +; RV64-NEXT: li a1, 0 +; RV64-NEXT: lui a2, %hi(X) +; RV64-NEXT: lui a3, %hi(Y) +; RV64-NEXT: .LBB0_2: # %bb +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: sh a1, %lo(X)(a2) +; RV64-NEXT: slliw a4, a1, 2 +; RV64-NEXT: addiw a1, a1, 1 +; RV64-NEXT: sh a4, %lo(Y)(a3) +; RV64-NEXT: bne a1, a0, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %return +; RV64-NEXT: ret +entry: + %tmp1019 = icmp sgt i32 %N, 0 ; [#uses=1] + br i1 %tmp1019, label %bb, label %return + +bb: ; preds = %bb, %entry + %i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; [#uses=2] + %tmp1 = trunc i32 %i.014.0 to i16 ; [#uses=2] + store volatile i16 %tmp1, i16* @X, align 2 + %tmp34 = shl i16 %tmp1, 2 ; [#uses=1] + store volatile i16 %tmp34, i16* @Y, align 2 + %indvar.next = add i32 %i.014.0, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %N ; [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce6.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce6.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce6.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind { +; RV32-LABEL: decodeMP3: +; RV32: # %bb.0: # %entry +; RV32-NEXT: li a0, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: decodeMP3: +; RV64: # %bb.0: # %entry +; RV64-NEXT: li a0, 0 +; RV64-NEXT: ret +entry: + br label %cond_true189 + +cond_true189: ; preds = %entry + ret i32 0 + +cond_next191: ; preds = %entry + br label %cond_false.i9 + +cond_false.i9: ; preds = %cond_next191 + ret i32 0 + +cond_next37.i: ; preds = %cond_next191 + br label %cond_true44.i + +cond_true44.i: ; preds = %cond_next37.i + br label %bb414.preheader.i + +cond_true11.i.i: ; preds = %cond_true44.i + ret i32 0 + +cond_false50.i: ; preds = %cond_next37.i + ret i32 0 + +bb414.preheader.i: ; preds = %cond_true44.i + br label %do_layer3.exit + +bb.i18: ; preds = %bb414.preheader.i + br label %cond_true79.i + +cond_true79.i: ; preds = %bb.i18 + ret i32 0 + +bb331.i: ; preds = %bb358.i, %cond_true.i149.i + br label %cond_false.i151.i + +cond_true.i149.i: ; preds = %bb331.i + br label %bb331.i + +cond_false.i151.i: ; preds = %bb331.i + ret i32 0 + +bb163.i.i: ; preds = %bb178.preheader.i.i, %bb163.i.i + %rawout2.451.rec.i.i = phi i64 [ 0, %bb178.preheader.i.i ], [ %indvar.next260.i, %bb163.i.i ] ; [#uses=2] + %i.052.i.i = trunc i64 %rawout2.451.rec.i.i to i32 ; [#uses=1] + %tmp165.i144.i = shl i32 %i.052.i.i, 5 ; [#uses=1] + %tmp165169.i.i = sext i32 %tmp165.i144.i to i64 ; [#uses=0] + %indvar.next260.i = add i64 %rawout2.451.rec.i.i, 1 ; [#uses=2] + %exitcond261.i = icmp eq i64 %indvar.next260.i, 18 ; [#uses=1] + br i1 %exitcond261.i, label %bb178.preheader.i.i, label %bb163.i.i + +bb178.preheader.i.i: ; preds = %bb163.i.i, %cond_true.i149.i + br label %bb163.i.i + +bb358.i: ; preds = %bb.i18 + br label %bb406.i + +bb406.i: ; preds = %bb358.i + ret i32 0 + +do_layer3.exit: ; preds = %bb414.preheader.i + ret i32 0 +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce7.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce7.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce7.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + + %struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] } + %struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] } + %struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] } + %struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 } + +define fastcc void @outer_loop(%struct.lame_global_flags* nocapture %gfp, double* nocapture %xr, i32 %targ_bits, double* nocapture %best_noise, %struct.III_psy_xmin* nocapture %l3_xmin, i32* nocapture %l3_enc, %struct.III_scalefac_t* nocapture %scalefac, %struct.gr_info* nocapture %cod_info, i32 %ch) nounwind { +; RV32-LABEL: outer_loop: +; RV32: # %bb.0: # %entry +; RV32-NEXT: li a0, 88 +; RV32-NEXT: li a1, 168 +; RV32-NEXT: li a2, 11 +; RV32-NEXT: .LBB0_1: # %bb28.i37 +; RV32-NEXT: # =>This Loop Header: Depth=1 +; RV32-NEXT: # Child Loop BB0_2 Depth 2 +; RV32-NEXT: li a3, 0 +; RV32-NEXT: mv a4, a1 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: .LBB0_2: # %bb29.i38 +; RV32-NEXT: # Parent Loop BB0_1 Depth=1 +; RV32-NEXT: # => This Inner Loop Header: Depth=2 +; RV32-NEXT: addi a3, a3, 1 +; RV32-NEXT: addi a5, a5, 12 +; RV32-NEXT: addi a4, a4, 8 +; RV32-NEXT: bgeu a2, a3, .LBB0_2 +; RV32-NEXT: # %bb.3: # %bb28.i37.loopexit +; RV32-NEXT: # in Loop: Header=BB0_1 Depth=1 +; RV32-NEXT: addi a0, a0, 4 +; RV32-NEXT: addi a1, a1, 168 +; RV32-NEXT: j .LBB0_1 +; +; RV64-LABEL: outer_loop: +; RV64: # %bb.0: # %entry +; RV64-NEXT: li a0, 0 +; RV64-NEXT: li a1, 11 +; RV64-NEXT: .LBB0_1: # %bb28.i37 +; RV64-NEXT: # =>This Loop Header: Depth=1 +; RV64-NEXT: # Child Loop BB0_2 Depth 2 +; RV64-NEXT: li a2, 0 +; RV64-NEXT: addiw a0, a0, 1 +; RV64-NEXT: .LBB0_2: # %bb29.i38 +; RV64-NEXT: # Parent Loop BB0_1 Depth=1 +; RV64-NEXT: # => This Inner Loop Header: Depth=2 +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: bgeu a1, a2, .LBB0_2 +; RV64-NEXT: j .LBB0_1 +entry: + br label %bb4 + +bb4: ; preds = %bb4, %entry + br i1 true, label %bb5, label %bb4 + +bb5: ; preds = %bb4 + br i1 true, label %bb28.i37, label %bb.i4 + +bb.i4: ; preds = %bb.i4, %bb5 + br label %bb.i4 + +bb28.i37: ; preds = %bb33.i47, %bb5 + %i.1.reg2mem.0.i = phi i32 [ %0, %bb33.i47 ], [ 0, %bb5 ] ; [#uses=2] + %0 = add i32 %i.1.reg2mem.0.i, 1 ; [#uses=2] + br label %bb29.i38 + +bb29.i38: ; preds = %bb33.i47, %bb28.i37 + %indvar32.i = phi i32 [ %indvar.next33.i, %bb33.i47 ], [ 0, %bb28.i37 ] ; [#uses=2] + %sfb.314.i = add i32 %indvar32.i, 0 ; [#uses=3] + %1 = getelementptr [4 x [21 x double]], [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i ; [#uses=1] + %2 = load double, double* %1, align 8 ; [#uses=0] + br i1 false, label %bb30.i41, label %bb33.i47 + +bb30.i41: ; preds = %bb29.i38 + %3 = getelementptr %struct.III_scalefac_t, %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.314.i, i32 %i.1.reg2mem.0.i ; [#uses=1] + store i32 0, i32* %3, align 4 + br label %bb33.i47 + +bb33.i47: ; preds = %bb30.i41, %bb29.i38 + %4 = add i32 %sfb.314.i, 1 ; [#uses=1] + %phitmp.i46 = icmp ugt i32 %4, 11 ; [#uses=1] + %indvar.next33.i = add i32 %indvar32.i, 1 ; [#uses=1] + br i1 %phitmp.i46, label %bb28.i37, label %bb29.i38 +} diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce8.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce8.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce8.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -verify-machineinstrs | FileCheck %s -check-prefixes=RV32 +; RUN: llc < %s -mtriple=riscv64 -verify-machineinstrs | FileCheck %s -check-prefixes=RV64 + +; FIXME: The first two instructions, movl and addl, should have been combined to +; "leal 16(%eax), %edx" by the backend (PR20776). + + %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 } + %struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] } + %struct.bitmap_head_def = type { %struct.bitmap_element*, %struct.bitmap_element*, i32 } + %struct.branch_path = type { %struct.rtx_def*, i32 } + %struct.c_lang_decl = type <{ i8, [3 x i8] }> + %struct.constant_descriptor = type { %struct.constant_descriptor*, i8*, %struct.rtx_def*, { x86_fp80 } } + %struct.eh_region = type { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, i32, %struct.bitmap_head_def*, i32, { { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, %struct.rtx_def* } }, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } + %struct.eh_status = type { %struct.eh_region*, %struct.eh_region**, %struct.eh_region*, %struct.eh_region*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.branch_path*, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } + %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %struct.tree_node**, %struct.rtx_def** } + %struct.equiv_table = type { %struct.rtx_def*, %struct.rtx_def* } + %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* } + %struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i8, i8, i8 } + %struct.goto_fixup = type { %struct.goto_fixup*, %struct.rtx_def*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.rtx_def*, %struct.tree_node* } + %struct.initial_value_struct = type { i32, i32, %struct.equiv_table* } + %struct.label_chain = type { %struct.label_chain*, %struct.tree_node* } + %struct.lang_decl = type { %struct.c_lang_decl, %struct.tree_node* } + %struct.language_function = type { %struct.stmt_tree_s, %struct.tree_node* } + %struct.machine_function = type { [59 x [3 x %struct.rtx_def*]], i32, i32 } + %struct.nesting = type { %struct.nesting*, %struct.nesting*, i32, %struct.rtx_def*, { { i32, %struct.rtx_def*, %struct.rtx_def*, %struct.nesting*, %struct.tree_node*, %struct.tree_node*, %struct.label_chain*, i32, i32, i32, i32, %struct.rtx_def*, %struct.tree_node** } } } + %struct.pool_constant = type { %struct.constant_descriptor*, %struct.pool_constant*, %struct.pool_constant*, %struct.rtx_def*, i32, i32, i32, i64, i32 } + %struct.rtunion = type { i64 } + %struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] } + %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack* } + %struct.stmt_status = type { %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, i32, i32, %struct.tree_node*, %struct.rtx_def*, i32, i8*, i32, %struct.goto_fixup* } + %struct.stmt_tree_s = type { %struct.tree_node*, %struct.tree_node*, i8*, i32 } + %struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %struct.tree_node*, %struct.tree_node*, i8, i8, i32, i32, i64, i64 } + %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 } + %struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, { %struct.function* }, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* } + %struct.tree_exp = type { %struct.tree_common, i32, [1 x %struct.tree_node*] } + %struct.tree_node = type { %struct.tree_decl } + %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* } + %struct.varasm_status = type { %struct.constant_descriptor**, %struct.pool_constant**, %struct.pool_constant*, %struct.pool_constant*, i64, %struct.rtx_def* } + %struct.varray_data = type { [1 x i64] } + %struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data } +@lineno = internal global i32 0 ; [#uses=1] +@tree_code_length = internal global [256 x i32] zeroinitializer +@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.tree_node* (i32, ...)* @build_stmt to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define %struct.tree_node* @build_stmt(i32 %code, ...) nounwind { +; RV32-LABEL: build_stmt: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: sw a6, 40(sp) +; RV32-NEXT: sw a5, 36(sp) +; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: mv a0, s0 +; RV32-NEXT: call make_node@plt +; RV32-NEXT: lui a1, %hi(tree_code_length) +; RV32-NEXT: addi a1, a1, %lo(tree_code_length) +; RV32-NEXT: slli a2, s0, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lui a2, %hi(lineno) +; RV32-NEXT: lw a2, %lo(lineno)(a2) +; RV32-NEXT: sw a2, 12(a0) +; RV32-NEXT: blez a1, .LBB0_3 +; RV32-NEXT: # %bb.1: # %bb.preheader +; RV32-NEXT: addi a2, a0, 16 +; RV32-NEXT: .LBB0_2: # %bb +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: lw a3, 4(sp) +; RV32-NEXT: addi a4, a3, 4 +; RV32-NEXT: sw a4, 4(sp) +; RV32-NEXT: lw a3, 0(a3) +; RV32-NEXT: sw a3, 0(a2) +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: addi a2, a2, 4 +; RV32-NEXT: bnez a1, .LBB0_2 +; RV32-NEXT: .LBB0_3: # %bb3 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: build_stmt: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi sp, sp, -96 +; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: mv s0, a0 +; RV64-NEXT: sd a7, 88(sp) +; RV64-NEXT: sd a6, 80(sp) +; RV64-NEXT: sd a5, 72(sp) +; RV64-NEXT: sd a4, 64(sp) +; RV64-NEXT: sd a3, 56(sp) +; RV64-NEXT: sd a2, 48(sp) +; RV64-NEXT: sd a1, 40(sp) +; RV64-NEXT: addi a0, sp, 40 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call make_node@plt +; RV64-NEXT: lui a1, %hi(tree_code_length) +; RV64-NEXT: addi a1, a1, %lo(tree_code_length) +; RV64-NEXT: sext.w a2, s0 +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lw a1, 0(a1) +; RV64-NEXT: lui a2, %hi(lineno) +; RV64-NEXT: lw a2, %lo(lineno)(a2) +; RV64-NEXT: sw a2, 24(a0) +; RV64-NEXT: blez a1, .LBB0_3 +; RV64-NEXT: # %bb.1: # %bb.preheader +; RV64-NEXT: li a2, 0 +; RV64-NEXT: .LBB0_2: # %bb +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: ld a3, 8(sp) +; RV64-NEXT: addi a4, a3, 4 +; RV64-NEXT: sd a4, 8(sp) +; RV64-NEXT: lwu a4, 4(a3) +; RV64-NEXT: lwu a3, 0(a3) +; RV64-NEXT: slli a5, a2, 3 +; RV64-NEXT: add a5, a0, a5 +; RV64-NEXT: sw a4, 36(a5) +; RV64-NEXT: addiw a2, a2, 1 +; RV64-NEXT: sw a3, 32(a5) +; RV64-NEXT: bne a2, a1, .LBB0_2 +; RV64-NEXT: .LBB0_3: # %bb3 +; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: ret +entry: + %p = alloca i8* ; [#uses=3] + %p1 = bitcast i8** %p to i8* ; [#uses=2] + call void @llvm.va_start(i8* %p1) + %0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind ; <%struct.tree_node*> [#uses=2] + %1 = getelementptr [256 x i32], [256 x i32]* @tree_code_length, i32 0, i32 %code ; [#uses=1] + %2 = load i32, i32* %1, align 4 ; [#uses=2] + %3 = load i32, i32* @lineno, align 4 ; [#uses=1] + %4 = bitcast %struct.tree_node* %0 to %struct.tree_exp* ; <%struct.tree_exp*> [#uses=2] + %5 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 1 ; [#uses=1] + store i32 %3, i32* %5, align 4 + %6 = icmp sgt i32 %2, 0 ; [#uses=1] + br i1 %6, label %bb, label %bb3 + +bb: ; preds = %bb, %entry + %i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] ; [#uses=2] + %7 = load i8*, i8** %p, align 4 ; [#uses=2] + %8 = getelementptr i8, i8* %7, i32 4 ; [#uses=1] + store i8* %8, i8** %p, align 4 + %9 = bitcast i8* %7 to %struct.tree_node** ; <%struct.tree_node**> [#uses=1] + %10 = load %struct.tree_node*, %struct.tree_node** %9, align 4 ; <%struct.tree_node*> [#uses=1] + %11 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01 ; <%struct.tree_node**> [#uses=1] + store %struct.tree_node* %10, %struct.tree_node** %11, align 4 + %indvar.next = add i32 %i.01, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %2 ; [#uses=1] + br i1 %exitcond, label %bb3, label %bb + +bb3: ; preds = %bb, %entry + call void @llvm.va_end(i8* %p1) + ret %struct.tree_node* %0 +} + +declare void @llvm.va_start(i8*) nounwind + +declare void @llvm.va_end(i8*) nounwind + +declare fastcc %struct.tree_node* @make_node(i32) nounwind