Index: llvm/test/CodeGen/AArch64/trunc-to-tbl.ll =================================================================== --- llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -234,3 +234,149 @@ exit: ret void } + +define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) { + +; CHECK-LABEL: _trunc_v16i64_to_v16i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB3_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8, lsl #7 +; CHECK-NEXT: ldp q3, q2, [x9, #96] +; CHECK-NEXT: ldp q1, q0, [x9, #32] +; CHECK-NEXT: uzp1.4s v2, v3, v2 +; CHECK-NEXT: ldp q5, q4, [x9, #64] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: ldp q3, q6, [x9] +; CHECK-NEXT: uzp1.4s v4, v5, v4 +; CHECK-NEXT: uzp1.8h v2, v4, v2 +; CHECK-NEXT: uzp1.4s v1, v3, v6 +; CHECK-NEXT: uzp1.8h v0, v1, v0 +; CHECK-NEXT: uzp1.16b v0, v0, v2 +; CHECK-NEXT: str q0, [x1, x8, lsl #4] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB3_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret + + +; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB3_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 +; CHECK-BE-NEXT: add x10, x9, #48 +; CHECK-BE-NEXT: add x11, x9, #32 +; CHECK-BE-NEXT: ld1 { v5.2d }, [x9] +; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #80 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x11] +; CHECK-BE-NEXT: add x11, x9, #112 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #96 +; CHECK-BE-NEXT: ld1 { v3.2d }, [x11] +; CHECK-BE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #64 +; CHECK-BE-NEXT: add x9, x9, #16 +; CHECK-BE-NEXT: ld1 { v6.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v7.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 +; CHECK-BE-NEXT: uzp1 v3.4s, v4.4s, v3.4s +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: uzp1 v2.4s, v6.4s, v2.4s +; CHECK-BE-NEXT: uzp1 v1.4s, v5.4s, v7.4s +; CHECK-BE-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-BE-NEXT: st1 { v0.16b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB3_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret + + +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr inbounds <16 x i64>, ptr %A, i64 %iv + %l.A = load <16 x i64>, ptr %gep.A + %trunc = trunc <16 x i64> %l.A to <16 x i8> + %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv + store <16 x i8> %trunc, ptr %gep.dst + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret void +} + +define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) { + +; CHECK-LABEL: _trunc_v8i64_to_v8i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB4_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8, lsl #6 +; CHECK-NEXT: ldp q1, q0, [x9, #32] +; CHECK-NEXT: ldp q3, q2, [x9] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: uzp1.4s v1, v3, v2 +; CHECK-NEXT: uzp1.8h v0, v1, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: str d0, [x1, x8, lsl #3] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB4_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret + +; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB4_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 +; CHECK-BE-NEXT: add x10, x9, #48 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] +; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #32 +; CHECK-BE-NEXT: add x9, x9, #16 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: uzp1 v0.4s, v2.4s, v0.4s +; CHECK-BE-NEXT: uzp1 v1.4s, v1.4s, v3.4s +; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-BE-NEXT: xtn v0.8b, v0.8h +; CHECK-BE-NEXT: st1 { v0.8b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB4_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret + +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr inbounds <8 x i64>, ptr %A, i64 %iv + %l.A = load <8 x i64>, ptr %gep.A + %trunc = trunc <8 x i64> %l.A to <8 x i8> + %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv + store <8 x i8> %trunc, ptr %gep.dst + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret void +}