diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17166,7 +17166,7 @@ unsigned *Fast) const { if (!VT.isVector()) { if (Fast) - *Fast = 0; + *Fast = Subtarget.enableUnalignedScalarMem(); return Subtarget.enableUnalignedScalarMem(); } @@ -17183,7 +17183,7 @@ // misaligned accesses. TODO: Work through the codegen implications of // allowing such accesses to be formed, and considered fast. if (Fast) - *Fast = 0; + *Fast = Subtarget.enableUnalignedVectorMem(); return Subtarget.enableUnalignedVectorMem(); } diff --git a/llvm/test/CodeGen/RISCV/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/memcpy-inline.ll --- a/llvm/test/CodeGen/RISCV/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/memcpy-inline.ll @@ -21,49 +21,77 @@ @spool.splbuf = internal global [512 x i8] zeroinitializer, align 16 define i32 @t0() { -; RV32-BOTH-LABEL: t0: -; RV32-BOTH: # %bb.0: # %entry -; RV32-BOTH-NEXT: lui a0, %hi(src) -; RV32-BOTH-NEXT: lw a1, %lo(src)(a0) -; RV32-BOTH-NEXT: lui a2, %hi(dst) -; RV32-BOTH-NEXT: sw a1, %lo(dst)(a2) -; RV32-BOTH-NEXT: addi a0, a0, %lo(src) -; RV32-BOTH-NEXT: lbu a1, 10(a0) -; RV32-BOTH-NEXT: lh a3, 8(a0) -; RV32-BOTH-NEXT: lw a0, 4(a0) -; RV32-BOTH-NEXT: addi a2, a2, %lo(dst) -; RV32-BOTH-NEXT: sb a1, 10(a2) -; RV32-BOTH-NEXT: sh a3, 8(a2) -; RV32-BOTH-NEXT: sw a0, 4(a2) -; RV32-BOTH-NEXT: li a0, 0 -; RV32-BOTH-NEXT: ret +; RV32-LABEL: t0: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a0, %hi(src) +; RV32-NEXT: lw a1, %lo(src)(a0) +; RV32-NEXT: lui a2, %hi(dst) +; RV32-NEXT: sw a1, %lo(dst)(a2) +; RV32-NEXT: addi a0, a0, %lo(src) +; RV32-NEXT: lbu a1, 10(a0) +; RV32-NEXT: lh a3, 8(a0) +; RV32-NEXT: lw a0, 4(a0) +; RV32-NEXT: addi a2, a2, %lo(dst) +; RV32-NEXT: sb a1, 10(a2) +; RV32-NEXT: sh a3, 8(a2) +; RV32-NEXT: sw a0, 4(a2) +; RV32-NEXT: li a0, 0 +; RV32-NEXT: ret ; -; RV64-BOTH-LABEL: t0: -; RV64-BOTH: # %bb.0: # %entry -; RV64-BOTH-NEXT: lui a0, %hi(src) -; RV64-BOTH-NEXT: ld a1, %lo(src)(a0) -; RV64-BOTH-NEXT: lui a2, %hi(dst) -; RV64-BOTH-NEXT: addi a0, a0, %lo(src) -; RV64-BOTH-NEXT: lbu a3, 10(a0) -; RV64-BOTH-NEXT: lh a0, 8(a0) -; RV64-BOTH-NEXT: sd a1, %lo(dst)(a2) -; RV64-BOTH-NEXT: addi a1, a2, %lo(dst) -; RV64-BOTH-NEXT: sb a3, 10(a1) -; RV64-BOTH-NEXT: sh a0, 8(a1) -; RV64-BOTH-NEXT: li a0, 0 -; RV64-BOTH-NEXT: ret +; RV64-LABEL: t0: +; RV64: # %bb.0: # %entry +; RV64-NEXT: lui a0, %hi(src) +; RV64-NEXT: ld a1, %lo(src)(a0) +; RV64-NEXT: lui a2, %hi(dst) +; RV64-NEXT: addi a0, a0, %lo(src) +; RV64-NEXT: lbu a3, 10(a0) +; RV64-NEXT: lh a0, 8(a0) +; RV64-NEXT: sd a1, %lo(dst)(a2) +; RV64-NEXT: addi a1, a2, %lo(dst) +; RV64-NEXT: sb a3, 10(a1) +; RV64-NEXT: sh a0, 8(a1) +; RV64-NEXT: li a0, 0 +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: t0: +; RV32-FAST: # %bb.0: # %entry +; RV32-FAST-NEXT: lui a0, %hi(src) +; RV32-FAST-NEXT: lw a1, %lo(src)(a0) +; RV32-FAST-NEXT: lui a2, %hi(dst) +; RV32-FAST-NEXT: addi a0, a0, %lo(src) +; RV32-FAST-NEXT: lw a3, 7(a0) +; RV32-FAST-NEXT: lw a0, 4(a0) +; RV32-FAST-NEXT: sw a1, %lo(dst)(a2) +; RV32-FAST-NEXT: addi a1, a2, %lo(dst) +; RV32-FAST-NEXT: sw a3, 7(a1) +; RV32-FAST-NEXT: sw a0, 4(a1) +; RV32-FAST-NEXT: li a0, 0 +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: t0: +; RV64-FAST: # %bb.0: # %entry +; RV64-FAST-NEXT: lui a0, %hi(src) +; RV64-FAST-NEXT: ld a1, %lo(src)(a0) +; RV64-FAST-NEXT: addi a0, a0, %lo(src) +; RV64-FAST-NEXT: lw a0, 7(a0) +; RV64-FAST-NEXT: lui a2, %hi(dst) +; RV64-FAST-NEXT: sd a1, %lo(dst)(a2) +; RV64-FAST-NEXT: addi a1, a2, %lo(dst) +; RV64-FAST-NEXT: sw a0, 7(a1) +; RV64-FAST-NEXT: li a0, 0 +; RV64-FAST-NEXT: ret entry: call void @llvm.memcpy.p0.p0.i32(ptr align 8 @dst, ptr align 8 @src, i32 11, i1 false) ret i32 0 } define void @t1(ptr nocapture %C) nounwind { -; RV32-BOTH-LABEL: t1: -; RV32-BOTH: # %bb.0: # %entry -; RV32-BOTH-NEXT: lui a1, %hi(.L.str1) -; RV32-BOTH-NEXT: addi a1, a1, %lo(.L.str1) -; RV32-BOTH-NEXT: li a2, 31 -; RV32-BOTH-NEXT: tail memcpy@plt +; RV32-LABEL: t1: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, %hi(.L.str1) +; RV32-NEXT: addi a1, a1, %lo(.L.str1) +; RV32-NEXT: li a2, 31 +; RV32-NEXT: tail memcpy@plt ; ; RV64-LABEL: t1: ; RV64: # %bb.0: # %entry @@ -72,22 +100,45 @@ ; RV64-NEXT: li a2, 31 ; RV64-NEXT: tail memcpy@plt ; +; RV32-FAST-LABEL: t1: +; RV32-FAST: # %bb.0: # %entry +; RV32-FAST-NEXT: lui a1, 1141 +; RV32-FAST-NEXT: addi a1, a1, -439 +; RV32-FAST-NEXT: sw a1, 27(a0) +; RV32-FAST-NEXT: lui a1, 300325 +; RV32-FAST-NEXT: addi a1, a1, 1107 +; RV32-FAST-NEXT: sw a1, 24(a0) +; RV32-FAST-NEXT: lui a1, 132181 +; RV32-FAST-NEXT: addi a1, a1, -689 +; RV32-FAST-NEXT: sw a1, 20(a0) +; RV32-FAST-NEXT: lui a1, 340483 +; RV32-FAST-NEXT: addi a1, a1, -947 +; RV32-FAST-NEXT: sw a1, 16(a0) +; RV32-FAST-NEXT: lui a1, 267556 +; RV32-FAST-NEXT: addi a1, a1, 1871 +; RV32-FAST-NEXT: sw a1, 12(a0) +; RV32-FAST-NEXT: lui a1, 337154 +; RV32-FAST-NEXT: addi a1, a1, 69 +; RV32-FAST-NEXT: sw a1, 8(a0) +; RV32-FAST-NEXT: lui a1, 320757 +; RV32-FAST-NEXT: addi a1, a1, 1107 +; RV32-FAST-NEXT: sw a1, 4(a0) +; RV32-FAST-NEXT: lui a1, 365861 +; RV32-FAST-NEXT: addi a1, a1, -1980 +; RV32-FAST-NEXT: sw a1, 0(a0) +; RV32-FAST-NEXT: ret +; ; RV64-FAST-LABEL: t1: ; RV64-FAST: # %bb.0: # %entry ; RV64-FAST-NEXT: lui a1, %hi(.L.str1) ; RV64-FAST-NEXT: ld a2, %lo(.L.str1)(a1) -; RV64-FAST-NEXT: sd a2, 0(a0) -; RV64-FAST-NEXT: lui a2, 4 -; RV64-FAST-NEXT: addiw a2, a2, 1870 -; RV64-FAST-NEXT: sh a2, 28(a0) -; RV64-FAST-NEXT: lui a2, 300325 -; RV64-FAST-NEXT: addiw a2, a2, 1107 ; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str1) -; RV64-FAST-NEXT: ld a3, 16(a1) +; RV64-FAST-NEXT: ld a3, 23(a1) +; RV64-FAST-NEXT: ld a4, 16(a1) ; RV64-FAST-NEXT: ld a1, 8(a1) -; RV64-FAST-NEXT: sw a2, 24(a0) -; RV64-FAST-NEXT: sb zero, 30(a0) -; RV64-FAST-NEXT: sd a3, 16(a0) +; RV64-FAST-NEXT: sd a2, 0(a0) +; RV64-FAST-NEXT: sd a3, 23(a0) +; RV64-FAST-NEXT: sd a4, 16(a0) ; RV64-FAST-NEXT: sd a1, 8(a0) ; RV64-FAST-NEXT: ret entry: @@ -270,10 +321,9 @@ ; ; RV32-FAST-LABEL: t5: ; RV32-FAST: # %bb.0: # %entry -; RV32-FAST-NEXT: sb zero, 6(a0) -; RV32-FAST-NEXT: lui a1, 5 -; RV32-FAST-NEXT: addi a1, a1, 1107 -; RV32-FAST-NEXT: sh a1, 4(a0) +; RV32-FAST-NEXT: lui a1, 1349 +; RV32-FAST-NEXT: addi a1, a1, 857 +; RV32-FAST-NEXT: sw a1, 3(a0) ; RV32-FAST-NEXT: lui a1, 365861 ; RV32-FAST-NEXT: addi a1, a1, -1980 ; RV32-FAST-NEXT: sw a1, 0(a0) @@ -281,10 +331,9 @@ ; ; RV64-FAST-LABEL: t5: ; RV64-FAST: # %bb.0: # %entry -; RV64-FAST-NEXT: sb zero, 6(a0) -; RV64-FAST-NEXT: lui a1, 5 -; RV64-FAST-NEXT: addiw a1, a1, 1107 -; RV64-FAST-NEXT: sh a1, 4(a0) +; RV64-FAST-NEXT: lui a1, 1349 +; RV64-FAST-NEXT: addiw a1, a1, 857 +; RV64-FAST-NEXT: sw a1, 3(a0) ; RV64-FAST-NEXT: lui a1, 365861 ; RV64-FAST-NEXT: addiw a1, a1, -1980 ; RV64-FAST-NEXT: sw a1, 0(a0) @@ -342,14 +391,12 @@ ; RV64-FAST-LABEL: t6: ; RV64-FAST: # %bb.0: # %entry ; RV64-FAST-NEXT: lui a0, %hi(.L.str6) -; RV64-FAST-NEXT: ld a0, %lo(.L.str6)(a0) -; RV64-FAST-NEXT: lui a1, %hi(spool.splbuf) -; RV64-FAST-NEXT: li a2, 88 -; RV64-FAST-NEXT: sh a2, %lo(spool.splbuf+12)(a1) -; RV64-FAST-NEXT: sd a0, %lo(spool.splbuf)(a1) -; RV64-FAST-NEXT: lui a0, 361862 -; RV64-FAST-NEXT: addiw a0, a0, -1960 -; RV64-FAST-NEXT: sw a0, %lo(spool.splbuf+8)(a1) +; RV64-FAST-NEXT: ld a1, %lo(.L.str6)(a0) +; RV64-FAST-NEXT: addi a0, a0, %lo(.L.str6) +; RV64-FAST-NEXT: ld a0, 6(a0) +; RV64-FAST-NEXT: lui a2, %hi(spool.splbuf) +; RV64-FAST-NEXT: sd a1, %lo(spool.splbuf)(a2) +; RV64-FAST-NEXT: sd a0, %lo(spool.splbuf+6)(a2) ; RV64-FAST-NEXT: ret entry: call void @llvm.memcpy.p0.p0.i64(ptr @spool.splbuf, ptr @.str6, i64 14, i1 false) @@ -397,3 +444,5 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV64-BOTH: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/memset-inline.ll b/llvm/test/CodeGen/RISCV/memset-inline.ll --- a/llvm/test/CodeGen/RISCV/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/memset-inline.ll @@ -1248,40 +1248,66 @@ ; Usual overlap tricks define void @aligned_bzero_7(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_7: -; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sb zero, 6(a0) -; RV32-BOTH-NEXT: sh zero, 4(a0) -; RV32-BOTH-NEXT: sw zero, 0(a0) -; RV32-BOTH-NEXT: ret +; RV32-LABEL: aligned_bzero_7: +; RV32: # %bb.0: +; RV32-NEXT: sb zero, 6(a0) +; RV32-NEXT: sh zero, 4(a0) +; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_7: -; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: sb zero, 6(a0) -; RV64-BOTH-NEXT: sh zero, 4(a0) -; RV64-BOTH-NEXT: sw zero, 0(a0) -; RV64-BOTH-NEXT: ret +; RV64-LABEL: aligned_bzero_7: +; RV64: # %bb.0: +; RV64-NEXT: sb zero, 6(a0) +; RV64-NEXT: sh zero, 4(a0) +; RV64-NEXT: sw zero, 0(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: aligned_bzero_7: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: sw zero, 3(a0) +; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: aligned_bzero_7: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sw zero, 3(a0) +; RV64-FAST-NEXT: sw zero, 0(a0) +; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 8 %a, i8 0, i64 7, i1 0) ret void } define void @aligned_bzero_15(ptr %a) nounwind { -; RV32-BOTH-LABEL: aligned_bzero_15: -; RV32-BOTH: # %bb.0: -; RV32-BOTH-NEXT: sb zero, 14(a0) -; RV32-BOTH-NEXT: sh zero, 12(a0) -; RV32-BOTH-NEXT: sw zero, 8(a0) -; RV32-BOTH-NEXT: sw zero, 4(a0) -; RV32-BOTH-NEXT: sw zero, 0(a0) -; RV32-BOTH-NEXT: ret +; RV32-LABEL: aligned_bzero_15: +; RV32: # %bb.0: +; RV32-NEXT: sb zero, 14(a0) +; RV32-NEXT: sh zero, 12(a0) +; RV32-NEXT: sw zero, 8(a0) +; RV32-NEXT: sw zero, 4(a0) +; RV32-NEXT: sw zero, 0(a0) +; RV32-NEXT: ret ; -; RV64-BOTH-LABEL: aligned_bzero_15: -; RV64-BOTH: # %bb.0: -; RV64-BOTH-NEXT: sb zero, 14(a0) -; RV64-BOTH-NEXT: sh zero, 12(a0) -; RV64-BOTH-NEXT: sw zero, 8(a0) -; RV64-BOTH-NEXT: sd zero, 0(a0) -; RV64-BOTH-NEXT: ret +; RV64-LABEL: aligned_bzero_15: +; RV64: # %bb.0: +; RV64-NEXT: sb zero, 14(a0) +; RV64-NEXT: sh zero, 12(a0) +; RV64-NEXT: sw zero, 8(a0) +; RV64-NEXT: sd zero, 0(a0) +; RV64-NEXT: ret +; +; RV32-FAST-LABEL: aligned_bzero_15: +; RV32-FAST: # %bb.0: +; RV32-FAST-NEXT: sw zero, 11(a0) +; RV32-FAST-NEXT: sw zero, 8(a0) +; RV32-FAST-NEXT: sw zero, 4(a0) +; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: ret +; +; RV64-FAST-LABEL: aligned_bzero_15: +; RV64-FAST: # %bb.0: +; RV64-FAST-NEXT: sd zero, 7(a0) +; RV64-FAST-NEXT: sd zero, 0(a0) +; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr align 8 %a, i8 0, i64 15, i1 0) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -836,10 +836,9 @@ ; ; RV32-FAST-LABEL: bzero_16: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 12(a0) -; RV32-FAST-NEXT: sw zero, 8(a0) -; RV32-FAST-NEXT: sw zero, 4(a0) -; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-FAST-NEXT: vmv.v.i v8, 0 +; RV32-FAST-NEXT: vse32.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_16: @@ -926,22 +925,16 @@ ; ; RV32-FAST-LABEL: bzero_32: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 28(a0) -; RV32-FAST-NEXT: sw zero, 24(a0) -; RV32-FAST-NEXT: sw zero, 20(a0) -; RV32-FAST-NEXT: sw zero, 16(a0) -; RV32-FAST-NEXT: sw zero, 12(a0) -; RV32-FAST-NEXT: sw zero, 8(a0) -; RV32-FAST-NEXT: sw zero, 4(a0) -; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-FAST-NEXT: vmv.v.i v8, 0 +; RV32-FAST-NEXT: vse32.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_32: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: sd zero, 24(a0) -; RV64-FAST-NEXT: sd zero, 16(a0) -; RV64-FAST-NEXT: sd zero, 8(a0) -; RV64-FAST-NEXT: sd zero, 0(a0) +; RV64-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-FAST-NEXT: vmv.v.i v8, 0 +; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0) ret void @@ -1086,34 +1079,16 @@ ; ; RV32-FAST-LABEL: bzero_64: ; RV32-FAST: # %bb.0: -; RV32-FAST-NEXT: sw zero, 60(a0) -; RV32-FAST-NEXT: sw zero, 56(a0) -; RV32-FAST-NEXT: sw zero, 52(a0) -; RV32-FAST-NEXT: sw zero, 48(a0) -; RV32-FAST-NEXT: sw zero, 44(a0) -; RV32-FAST-NEXT: sw zero, 40(a0) -; RV32-FAST-NEXT: sw zero, 36(a0) -; RV32-FAST-NEXT: sw zero, 32(a0) -; RV32-FAST-NEXT: sw zero, 28(a0) -; RV32-FAST-NEXT: sw zero, 24(a0) -; RV32-FAST-NEXT: sw zero, 20(a0) -; RV32-FAST-NEXT: sw zero, 16(a0) -; RV32-FAST-NEXT: sw zero, 12(a0) -; RV32-FAST-NEXT: sw zero, 8(a0) -; RV32-FAST-NEXT: sw zero, 4(a0) -; RV32-FAST-NEXT: sw zero, 0(a0) +; RV32-FAST-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-FAST-NEXT: vmv.v.i v8, 0 +; RV32-FAST-NEXT: vse32.v v8, (a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: bzero_64: ; RV64-FAST: # %bb.0: -; RV64-FAST-NEXT: sd zero, 56(a0) -; RV64-FAST-NEXT: sd zero, 48(a0) -; RV64-FAST-NEXT: sd zero, 40(a0) -; RV64-FAST-NEXT: sd zero, 32(a0) -; RV64-FAST-NEXT: sd zero, 24(a0) -; RV64-FAST-NEXT: sd zero, 16(a0) -; RV64-FAST-NEXT: sd zero, 8(a0) -; RV64-FAST-NEXT: sd zero, 0(a0) +; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-FAST-NEXT: vmv.v.i v8, 0 +; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0) ret void diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -267,11 +267,16 @@ } define void @merge_stores_i8_i16(ptr %p) { -; ALL-LABEL: merge_stores_i8_i16: -; ALL: # %bb.0: -; ALL-NEXT: sb zero, 0(a0) -; ALL-NEXT: sb zero, 1(a0) -; ALL-NEXT: ret +; SLOW-LABEL: merge_stores_i8_i16: +; SLOW: # %bb.0: +; SLOW-NEXT: sb zero, 0(a0) +; SLOW-NEXT: sb zero, 1(a0) +; SLOW-NEXT: ret +; +; FAST-LABEL: merge_stores_i8_i16: +; FAST: # %bb.0: +; FAST-NEXT: sh zero, 0(a0) +; FAST-NEXT: ret store i8 0, ptr %p %p2 = getelementptr i8, ptr %p, i32 1 store i8 0, ptr %p2 @@ -279,13 +284,18 @@ } define void @merge_stores_i8_i32(ptr %p) { -; ALL-LABEL: merge_stores_i8_i32: -; ALL: # %bb.0: -; ALL-NEXT: sb zero, 0(a0) -; ALL-NEXT: sb zero, 1(a0) -; ALL-NEXT: sb zero, 2(a0) -; ALL-NEXT: sb zero, 3(a0) -; ALL-NEXT: ret +; SLOW-LABEL: merge_stores_i8_i32: +; SLOW: # %bb.0: +; SLOW-NEXT: sb zero, 0(a0) +; SLOW-NEXT: sb zero, 1(a0) +; SLOW-NEXT: sb zero, 2(a0) +; SLOW-NEXT: sb zero, 3(a0) +; SLOW-NEXT: ret +; +; FAST-LABEL: merge_stores_i8_i32: +; FAST: # %bb.0: +; FAST-NEXT: sw zero, 0(a0) +; FAST-NEXT: ret store i8 0, ptr %p %p2 = getelementptr i8, ptr %p, i32 1 store i8 0, ptr %p2 @@ -297,17 +307,28 @@ } define void @merge_stores_i8_i64(ptr %p) { -; ALL-LABEL: merge_stores_i8_i64: -; ALL: # %bb.0: -; ALL-NEXT: sb zero, 0(a0) -; ALL-NEXT: sb zero, 1(a0) -; ALL-NEXT: sb zero, 2(a0) -; ALL-NEXT: sb zero, 3(a0) -; ALL-NEXT: sb zero, 4(a0) -; ALL-NEXT: sb zero, 5(a0) -; ALL-NEXT: sb zero, 6(a0) -; ALL-NEXT: sb zero, 7(a0) -; ALL-NEXT: ret +; SLOW-LABEL: merge_stores_i8_i64: +; SLOW: # %bb.0: +; SLOW-NEXT: sb zero, 0(a0) +; SLOW-NEXT: sb zero, 1(a0) +; SLOW-NEXT: sb zero, 2(a0) +; SLOW-NEXT: sb zero, 3(a0) +; SLOW-NEXT: sb zero, 4(a0) +; SLOW-NEXT: sb zero, 5(a0) +; SLOW-NEXT: sb zero, 6(a0) +; SLOW-NEXT: sb zero, 7(a0) +; SLOW-NEXT: ret +; +; RV32I-FAST-LABEL: merge_stores_i8_i64: +; RV32I-FAST: # %bb.0: +; RV32I-FAST-NEXT: sw zero, 0(a0) +; RV32I-FAST-NEXT: sw zero, 4(a0) +; RV32I-FAST-NEXT: ret +; +; RV64I-FAST-LABEL: merge_stores_i8_i64: +; RV64I-FAST: # %bb.0: +; RV64I-FAST-NEXT: sd zero, 0(a0) +; RV64I-FAST-NEXT: ret store i8 0, ptr %p %p2 = getelementptr i8, ptr %p, i32 1 store i8 0, ptr %p2 @@ -327,11 +348,16 @@ } define void @merge_stores_i16_i32(ptr %p) { -; ALL-LABEL: merge_stores_i16_i32: -; ALL: # %bb.0: -; ALL-NEXT: sh zero, 0(a0) -; ALL-NEXT: sh zero, 2(a0) -; ALL-NEXT: ret +; SLOW-LABEL: merge_stores_i16_i32: +; SLOW: # %bb.0: +; SLOW-NEXT: sh zero, 0(a0) +; SLOW-NEXT: sh zero, 2(a0) +; SLOW-NEXT: ret +; +; FAST-LABEL: merge_stores_i16_i32: +; FAST: # %bb.0: +; FAST-NEXT: sw zero, 0(a0) +; FAST-NEXT: ret store i16 0, ptr %p %p2 = getelementptr i16, ptr %p, i32 1 store i16 0, ptr %p2 @@ -339,13 +365,24 @@ } define void @merge_stores_i16_i64(ptr %p) { -; ALL-LABEL: merge_stores_i16_i64: -; ALL: # %bb.0: -; ALL-NEXT: sh zero, 0(a0) -; ALL-NEXT: sh zero, 2(a0) -; ALL-NEXT: sh zero, 4(a0) -; ALL-NEXT: sh zero, 6(a0) -; ALL-NEXT: ret +; SLOW-LABEL: merge_stores_i16_i64: +; SLOW: # %bb.0: +; SLOW-NEXT: sh zero, 0(a0) +; SLOW-NEXT: sh zero, 2(a0) +; SLOW-NEXT: sh zero, 4(a0) +; SLOW-NEXT: sh zero, 6(a0) +; SLOW-NEXT: ret +; +; RV32I-FAST-LABEL: merge_stores_i16_i64: +; RV32I-FAST: # %bb.0: +; RV32I-FAST-NEXT: sw zero, 0(a0) +; RV32I-FAST-NEXT: sw zero, 4(a0) +; RV32I-FAST-NEXT: ret +; +; RV64I-FAST-LABEL: merge_stores_i16_i64: +; RV64I-FAST: # %bb.0: +; RV64I-FAST-NEXT: sd zero, 0(a0) +; RV64I-FAST-NEXT: ret store i16 0, ptr %p %p2 = getelementptr i16, ptr %p, i32 1 store i16 0, ptr %p2 @@ -357,11 +394,22 @@ } define void @merge_stores_i32_i64(ptr %p) { -; ALL-LABEL: merge_stores_i32_i64: -; ALL: # %bb.0: -; ALL-NEXT: sw zero, 0(a0) -; ALL-NEXT: sw zero, 4(a0) -; ALL-NEXT: ret +; SLOW-LABEL: merge_stores_i32_i64: +; SLOW: # %bb.0: +; SLOW-NEXT: sw zero, 0(a0) +; SLOW-NEXT: sw zero, 4(a0) +; SLOW-NEXT: ret +; +; RV32I-FAST-LABEL: merge_stores_i32_i64: +; RV32I-FAST: # %bb.0: +; RV32I-FAST-NEXT: sw zero, 0(a0) +; RV32I-FAST-NEXT: sw zero, 4(a0) +; RV32I-FAST-NEXT: ret +; +; RV64I-FAST-LABEL: merge_stores_i32_i64: +; RV64I-FAST: # %bb.0: +; RV64I-FAST-NEXT: sd zero, 0(a0) +; RV64I-FAST-NEXT: ret store i32 0, ptr %p %p2 = getelementptr i32, ptr %p, i32 1 store i32 0, ptr %p2