diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td --- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td +++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td @@ -30,3 +30,51 @@ let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType], [IntrReadMem]>; let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType], [LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : GCCBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : GCCBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstot_vssl : GCCBuiltin<"__builtin_ve_vl_vstot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstot_vssml : GCCBuiltin<"__builtin_ve_vl_vstot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu_vssl : GCCBuiltin<"__builtin_ve_vl_vstu_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu_vssml : GCCBuiltin<"__builtin_ve_vl_vstu_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssl : GCCBuiltin<"__builtin_ve_vl_vstunc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssml : GCCBuiltin<"__builtin_ve_vl_vstunc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl_vssl : GCCBuiltin<"__builtin_ve_vl_vstl_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl_vssml : GCCBuiltin<"__builtin_ve_vl_vstl_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssl : GCCBuiltin<"__builtin_ve_vl_vst2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssml : GCCBuiltin<"__builtin_ve_vl_vst2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssl">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssml">, Intrinsic<[], [LLVMType, LLVMType, llvm_ptr_ty, LLVMType, LLVMType], [IntrWriteMem]>; diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td @@ -62,3 +62,99 @@ def : Pat<(int_ve_vl_vldl2dzxnc_vssvl i64:$sy, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCrrl_v i64:$sy, i64:$sz, i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_vldl2dzxnc_vssl simm7:$I, i64:$sz, i32:$vl), (VLDL2DZXNCirl (LO7 $I), i64:$sz, i32:$vl)>; def : Pat<(int_ve_vl_vldl2dzxnc_vssvl simm7:$I, i64:$sz, v256f64:$pt, i32:$vl), (VLDL2DZXNCirl_v (LO7 $I), i64:$sz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstunc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTUNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTUNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstuncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTUNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTLNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTLNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstlncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTLNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VST2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VST2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vst2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VST2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTU2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTU2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstu2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTU2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2Drrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2Dirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Drrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2d_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2Dirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dnc_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, i64:$sy, i64:$sz, i32:$vl), (VSTL2DNCOTrrvl i64:$sy, i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssl v256f64:$vx, simm7:$I, i64:$sz, i32:$vl), (VSTL2DNCOTirvl (LO7 $I), i64:$sz, v256f64:$vx, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, i64:$sy, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTrrvml i64:$sy, i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vstl2dncot_vssml v256f64:$vx, simm7:$I, i64:$sz, v256i1:$vm, i32:$vl), (VSTL2DNCOTirvml (LO7 $I), i64:$sz, v256f64:$vx, v256i1:$vm, i32:$vl)>; diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vst.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vst.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vst.ll @@ -0,0 +1,1401 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test vector store intrinsic instructions +;;; +;;; Note: +;;; We test VST*rrvl, VST*rrvml, VST*irvl, and VST*irvml instructions. + +; Function Attrs: nounwind +define void @vst_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vst_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind readonly +declare <256 x double> @llvm.ve.vl.vld.vssl(i64, i8*, i32) + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vst_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vst_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vst_vssl_imm(i8* %0) { +; CHECK-LABEL: vst_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst_vssml_imm(i8* %0) { +; CHECK-LABEL: vst_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst.nc %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstnc.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstnc.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstnc_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstnc_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstnc.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vstnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst.nc %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstnc.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstnc_vssml_imm(i8* %0) { +; CHECK-LABEL: vstnc_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstncot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstncot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst.nc.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstncot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstncot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstncot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstncot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstncot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstncot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstncot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst.nc.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstncot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstncot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstncot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstu_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstu_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstu_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstu_vssl_imm(i8* %0) { +; CHECK-LABEL: vstu_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu_vssml_imm(i8* %0) { +; CHECK-LABEL: vstu_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstunc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstunc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu.nc %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstunc.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstunc.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstunc_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstunc_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstunc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstunc.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstunc_vssl_imm(i8* %0) { +; CHECK-LABEL: vstunc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu.nc %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstunc.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstunc_vssml_imm(i8* %0) { +; CHECK-LABEL: vstunc_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstunc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstuot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstuot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstuot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstuot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstuot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstuot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstuot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstuot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstuot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstuot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstuncot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstuncot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu.nc.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuncot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstuncot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstuncot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstuncot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstuncot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstuncot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstuncot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu.nc.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuncot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstuncot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstuncot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstuncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstl_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstl_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstl_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstl_vssl_imm(i8* %0) { +; CHECK-LABEL: vstl_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl_vssml_imm(i8* %0) { +; CHECK-LABEL: vstl_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstlnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstlnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl.nc %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlnc.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstlnc.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstlnc_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstlnc_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstlnc.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstlnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vstlnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl.nc %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlnc.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstlnc_vssml_imm(i8* %0) { +; CHECK-LABEL: vstlnc_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstlot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstlot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstlot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstlot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstlot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstlot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstlot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstlot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstlot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstlot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstlncot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstlncot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl.nc.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlncot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstlncot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstlncot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstlncot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstlncot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstlncot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstlncot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl.nc.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlncot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstlncot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstlncot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstlncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2d_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vst2d_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2d.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2d.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vst2d_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vst2d_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2d.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2d.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vst2d_vssl_imm(i8* %0) { +; CHECK-LABEL: vst2d_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2d.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2d_vssml_imm(i8* %0) { +; CHECK-LABEL: vst2d_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2d.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2dnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vst2dnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d.nc %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dnc.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2dnc.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vst2dnc_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vst2dnc_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2dnc.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vst2dnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vst2dnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d.nc %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dnc.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2dnc_vssml_imm(i8* %0) { +; CHECK-LABEL: vst2dnc_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2dot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vst2dot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2dot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vst2dot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vst2dot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2dot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vst2dot_vssl_imm(i8* %0) { +; CHECK-LABEL: vst2dot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2dot_vssml_imm(i8* %0) { +; CHECK-LABEL: vst2dot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2dncot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vst2dncot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d.nc.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dncot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2dncot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vst2dncot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vst2dncot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vst2d.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vst2dncot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vst2dncot_vssl_imm(i8* %0) { +; CHECK-LABEL: vst2dncot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d.nc.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dncot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vst2dncot_vssml_imm(i8* %0) { +; CHECK-LABEL: vst2dncot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vst2d.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vst2dncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2d_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2d_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2d.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2d.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstu2d_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2d_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2d.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2d.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstu2d_vssl_imm(i8* %0) { +; CHECK-LABEL: vstu2d_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2d.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2d_vssml_imm(i8* %0) { +; CHECK-LABEL: vstu2d_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2d.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2dnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2dnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d.nc %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dnc.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2dnc.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstu2dnc_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2dnc_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2dnc.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstu2dnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vstu2dnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d.nc %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dnc.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2dnc_vssml_imm(i8* %0) { +; CHECK-LABEL: vstu2dnc_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2dot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2dot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2dot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstu2dot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2dot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2dot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstu2dot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstu2dot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2dot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstu2dot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2dncot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2dncot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d.nc.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dncot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2dncot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstu2dncot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstu2dncot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstu2d.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstu2dncot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstu2dncot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstu2dncot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d.nc.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dncot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstu2dncot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstu2dncot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstu2d.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstu2dncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2d_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2d_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2d.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2d.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstl2d_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2d_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2d.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2d.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstl2d_vssl_imm(i8* %0) { +; CHECK-LABEL: vstl2d_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2d.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2d_vssml_imm(i8* %0) { +; CHECK-LABEL: vstl2d_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2d.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2dnc_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2dnc_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d.nc %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dnc.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2dnc.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstl2dnc_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2dnc_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2dnc.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstl2dnc_vssl_imm(i8* %0) { +; CHECK-LABEL: vstl2dnc_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d.nc %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dnc.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2dnc_vssml_imm(i8* %0) { +; CHECK-LABEL: vstl2dnc_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2dot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2dot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2dot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstl2dot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2dot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2dot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstl2dot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstl2dot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2dot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstl2dot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2dncot_vssl(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2dncot_vssl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d.nc.ot %v0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dncot.vssl(<256 x double> %3, i64 %1, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2dncot.vssl(<256 x double>, i64, i8*, i32) + +; Function Attrs: nounwind +define void @vstl2dncot_vssml(i8* %0, i64 %1) { +; CHECK-LABEL: vstl2dncot_vssml: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: vld %v0, %s1, %s0 +; CHECK-NEXT: vstl2d.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) + ret void +} + +; Function Attrs: nounwind writeonly +declare void @llvm.ve.vl.vstl2dncot.vssml(<256 x double>, i64, i8*, <256 x i1>, i32) + +; Function Attrs: nounwind +define void @vstl2dncot_vssl_imm(i8* %0) { +; CHECK-LABEL: vstl2dncot_vssl_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d.nc.ot %v0, 8, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dncot.vssl(<256 x double> %2, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vstl2dncot_vssml_imm(i8* %0) { +; CHECK-LABEL: vstl2dncot_vssml_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: vstl2d.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + tail call void @llvm.ve.vl.vstl2dncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) + ret void +}