Index: test/CodeGen/AArch64/SVE/shifted_identity_loop.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/SVE/shifted_identity_loop.ll @@ -0,0 +1,118 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +; ModuleID = 'ShiftedIdentityLoop.c' +source_filename = "ShiftedIdentityLoop.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +;; Generated from the following C code +; void shifted_id_loop(int *a, int count) { +; for (int i = 0; i < count; ++i) +; a[i] = i << 2; +; } + +define void @shifted_id_loop(i32* nocapture %a, i32 %count) { +; CHECK-LABEL: shifted_id_loop: +entry: + %cmp5 = icmp sgt i32 %count, 0 + br i1 %cmp5, label %for.body.lr.ph, label %for.cond.cleanup + +; CHECK-LABEL: // %for.body.lr.ph +; CHECK: orr w[[SINGLEVEC:[0-9]+]], wzr, #0x1 +; CHECK: rdvl x[[RDVLMIN:[0-9]+]], w[[SINGLEVEC]] +; CHECK: lsl x[[NUMELTSMIN:[0-9]+]], x[[RDVLMIN]], #2 +; CHECK: cmp x[[NUMELTSMIN]], x{{[0-9]+}} +for.body.lr.ph: + %wide.trip.count = zext i32 %count to i64 + %0 = call i64 @llvm.experimental.vector.vscale.i64() + %1 = shl i64 %0, 2 + %min.iters.check = icmp ugt i64 %1, %wide.trip.count + br i1 %min.iters.check, label %for.body.preheader, label %min.iters.checked + +for.body.preheader: + %indvars.iv.ph = phi i64 [ 0, %min.iters.checked ], + [ 0, %for.body.lr.ph ], + [ %n.vec, %middle.block ] + br label %for.body + +min.iters.checked: + %n.mod.vf = urem i64 %wide.trip.count, %1 + %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf + %cmp.zero = icmp eq i64 %n.vec, 0 + br i1 %cmp.zero, label %for.body.preheader, label %vector.ph + +; CHECK-LABEL: // %vector.ph +; CHECK: orr w[[STEP:[0-9]+]], wzr, #0x1 +; CHECK: index [[DATA:z[0-9]+]].s, wzr, w[[STEP]] +; CHECK: rdvl x[[RDVL:[0-9]+]], w[[STEP]] +; CHECK-DAG: orr w[[SHIFTAMT:[0-9]+]], wzr, #0x2 +; CHECK-DAG: lsl w[[NUMELTS:[0-9]+]], w[[RDVL]], #2 +; CHECK-DAG: lsl x[[PTRINC:[0-9]+]], x[[RDVL]], #4 +; CHECK: mov [[NUMELTSSPLAT:z[0-9]+]].s, w[[NUMELTS]] +; CHECK: mov [[SHIFTAMTSPLAT:z[0-9]+]].s, w[[SHIFTAMT]] +; CHECK: mov x[[OFFSET:[0-9]+]], x0 +vector.ph: + ;; Stepvector used for the identity value of the element + %2 = call @llvm.experimental.vector.stepvector.nxv4i32() + ;; i64 vscale used for addressing. + ;; vscale * 4 == number of elements in a vector + %3 = call i64 @llvm.experimental.vector.vscale.i64() + %4 = shl i64 %3, 2 + ;; Create a splat of the number of elements in a vector + %5 = call i32 @llvm.experimental.vector.vscale.i32() + %6 = shl i32 %5, 2 + %.splatinsert8 = insertelement undef, i32 %6, i32 0 + %.splat9 = shufflevector %.splatinsert8, + undef, + zeroinitializer + br label %vector.body + +; CHECK-LABEL: // %vector.body +; CHECK: ptrue [[PG:p[0-9]+]].s +; CHECK: add [[NEXTDATA:z[0-9]+]].s, [[DATA]].s, [[NUMELTSSPLAT]].s +; CHECK: lsl [[STOREDATA:z[0-9]+]].s, [[PG]]/m, [[DATA]].s, [[SHIFTAMTSPLAT]].s +; CHECK: ptrue [[PSTORE:p[0-9]+]].s +; CHECK: st1w { [[STOREDATA]].s }, [[PSTORE]], [x[[OFFSET]]] +; CHECK: add x[[OFFSET]], x[[OFFSET]], x[[PTRINC]] +; CHECK: mov [[DATA]].d, [[NEXTDATA]].d +vector.body: + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %7 = phi i64 [ 0, %vector.ph ], [ %8, %vector.body ] + %vec.ind10 = phi [ %2, %vector.ph ], [ %step.add11, %vector.body ] + %8 = add i64 %7, %4 + %step.add11 = add %vec.ind10, %.splat9 + ;; shift the element id numbers by 2 + %insert = insertelement undef, i32 2, i32 0 + %splat = shufflevector %insert, + undef, + zeroinitializer + %9 = shl nsw %vec.ind10, %splat + %10 = getelementptr inbounds i32, i32* %a, i64 %7 + %11 = bitcast i32* %10 to * + store %9, * %11, align 4 + %index.next = add i64 %index, %1 + %12 = icmp eq i64 %index.next, %n.vec + br i1 %12, label %middle.block, label %vector.body + +middle.block: + %cmp.n = icmp eq i64 %n.mod.vf, 0 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] + %13 = trunc i64 %indvars.iv to i32 + %mul = shl nsw i32 %13, 2 + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + store i32 %mul, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +declare i64 @llvm.experimental.vector.vscale.i64() +declare i32 @llvm.experimental.vector.vscale.i32() +declare @llvm.experimental.vector.stepvector.nxv4i32() +