diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td --- a/llvm/include/llvm/IR/IntrinsicsVE.td +++ b/llvm/include/llvm/IR/IntrinsicsVE.td @@ -2,9 +2,6 @@ // VEL Intrinsic instructions. let TargetPrefix = "ve" in { - def int_ve_vl_svob : GCCBuiltin<"__builtin_ve_vl_svob">, - Intrinsic<[], [], [IntrHasSideEffects]>; - def int_ve_vl_pack_f32p : GCCBuiltin<"__builtin_ve_vl_pack_f32p">, Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrReadMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td --- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td +++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td @@ -407,6 +407,42 @@ let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldz_vvl : GCCBuiltin<"__builtin_ve_vl_vldz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldz_vvvl : GCCBuiltin<"__builtin_ve_vl_vldz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vldz_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvl : GCCBuiltin<"__builtin_ve_vl_pvldzlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvvl : GCCBuiltin<"__builtin_ve_vl_pvldzlo_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzlo_vvmvl : GCCBuiltin<"__builtin_ve_vl_pvldzlo_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvl : GCCBuiltin<"__builtin_ve_vl_pvldzup_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvvl : GCCBuiltin<"__builtin_ve_vl_pvldzup_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldzup_vvmvl : GCCBuiltin<"__builtin_ve_vl_pvldzup_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvl : GCCBuiltin<"__builtin_ve_vl_pvldz_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvvl : GCCBuiltin<"__builtin_ve_vl_pvldz_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvldz_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvldz_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvl : GCCBuiltin<"__builtin_ve_vl_vpcnt_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvvl : GCCBuiltin<"__builtin_ve_vl_vpcnt_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vpcnt_vvmvl : GCCBuiltin<"__builtin_ve_vl_vpcnt_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvl : GCCBuiltin<"__builtin_ve_vl_pvpcntlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvvl : GCCBuiltin<"__builtin_ve_vl_pvpcntlo_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntlo_vvmvl : GCCBuiltin<"__builtin_ve_vl_pvpcntlo_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvl : GCCBuiltin<"__builtin_ve_vl_pvpcntup_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvvl : GCCBuiltin<"__builtin_ve_vl_pvpcntup_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcntup_vvmvl : GCCBuiltin<"__builtin_ve_vl_pvpcntup_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvl : GCCBuiltin<"__builtin_ve_vl_pvpcnt_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvvl : GCCBuiltin<"__builtin_ve_vl_pvpcnt_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvpcnt_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvpcnt_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvl : GCCBuiltin<"__builtin_ve_vl_vbrv_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvvl : GCCBuiltin<"__builtin_ve_vl_vbrv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrv_vvmvl : GCCBuiltin<"__builtin_ve_vl_vbrv_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvl : GCCBuiltin<"__builtin_ve_vl_pvbrvlo_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvvl : GCCBuiltin<"__builtin_ve_vl_pvbrvlo_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvlo_vvmvl : GCCBuiltin<"__builtin_ve_vl_pvbrvlo_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvl : GCCBuiltin<"__builtin_ve_vl_pvbrvup_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvvl : GCCBuiltin<"__builtin_ve_vl_pvbrvup_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrvup_vvmvl : GCCBuiltin<"__builtin_ve_vl_pvbrvup_vvmvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvl : GCCBuiltin<"__builtin_ve_vl_pvbrv_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvvl : GCCBuiltin<"__builtin_ve_vl_pvbrv_vvvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrv_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvbrv_vvMvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_vseq_vl : GCCBuiltin<"__builtin_ve_vl_vseq_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_vseq_vvl : GCCBuiltin<"__builtin_ve_vl_vseq_vvl">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vl">, Intrinsic<[LLVMType], [LLVMType], [IntrNoMem]>; @@ -1211,3 +1247,11 @@ let TargetPrefix = "ve" in def int_ve_vl_pcvm_sml : GCCBuiltin<"__builtin_ve_vl_pcvm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_lzvm_sml : GCCBuiltin<"__builtin_ve_vl_lzvm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_tovm_sml : GCCBuiltin<"__builtin_ve_vl_tovm_sml">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lcr_sss : GCCBuiltin<"__builtin_ve_vl_lcr_sss">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_scr_sss : GCCBuiltin<"__builtin_ve_vl_scr_sss">, Intrinsic<[], [LLVMType, LLVMType, LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_tscr_ssss : GCCBuiltin<"__builtin_ve_vl_tscr_ssss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fidcr_sss : GCCBuiltin<"__builtin_ve_vl_fidcr_sss">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fencei : GCCBuiltin<"__builtin_ve_vl_fencei">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fencem_s : GCCBuiltin<"__builtin_ve_vl_fencem_s">, Intrinsic<[], [LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_fencec_s : GCCBuiltin<"__builtin_ve_vl_fencec_s">, Intrinsic<[], [LLVMType], [IntrNoMem, IntrHasSideEffects]>; +let TargetPrefix = "ve" in def int_ve_vl_svob : GCCBuiltin<"__builtin_ve_vl_svob">, Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -875,14 +875,14 @@ // e.g. LCR let hasSideEffects = 1 in multiclass LOADCRmopc, RegisterClass RC> { - def rr : RR; - let cy = 0 in def ri : RR; - let cz = 0 in def zr : RR; let cy = 0, cz = 0 in - def zi : RR; } @@ -890,17 +890,31 @@ // e.g. SCR let hasSideEffects = 1 in multiclass STORECRmopc, RegisterClass RC> { - def rr : RR; - let cy = 0 in def ri : RR; - let cz = 0 in def zr : RR; + let cy = 0 in def irr : RR; + let cz = 0 in def rzr : RR; let cy = 0, cz = 0 in - def zi : RR; + def izr : RR; +} + +let hasSideEffects = 1, Constraints = "$sx = $sx_in", DisableEncoding = "$sx_in" in +multiclass TSCRmopc, RegisterClass RC> { + def rrr : RR; + let cy = 0 in def irr : RR; + let cz = 0 in def rzr : RR; + let cy = 0, cz = 0 in + def izr : RR; } + // Multiclass for communication register instructions. // e.g. FIDCR let cz = 0, hasSideEffects = 1 in @@ -1528,7 +1542,7 @@ defm SCR : STORECRm<"scr", 0x50, I64>; // Section 8.19.11 - TSCR (Test & Set Communication Register) -defm TSCR : LOADCRm<"tscr", 0x41, I64>; +defm TSCR : TSCRm<"tscr", 0x41, I64>; // Section 8.19.12 - FIDCR (Fetch & Increment/Decrement CR) defm FIDCR : FIDCRm<"fidcr", 0x51, I64>; diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td @@ -601,6 +601,42 @@ def : Pat<(int_ve_vl_pveqv_vsvvl i64:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), (PVEQVrvl_v i64:$sy, v256f64:$vz, i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_pveqv_vvvMvl v256f64:$vy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVvvml_v v256f64:$vy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_pveqv_vsvMvl i64:$sy, v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVEQVrvml_v i64:$sy, v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldz_vvl v256f64:$vz, i32:$vl), (VLDZvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vldz_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VLDZvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vldz_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VLDZvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzlo_vvl v256f64:$vz, i32:$vl), (PVLDZLOvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvldzlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVLDZLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzup_vvl v256f64:$vz, i32:$vl), (PVLDZUPvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvldzup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldzup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVLDZUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldz_vvl v256f64:$vz, i32:$vl), (PVLDZvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvldz_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVLDZvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvldz_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVLDZvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vpcnt_vvl v256f64:$vz, i32:$vl), (VPCNTvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vpcnt_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VPCNTvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vpcnt_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VPCNTvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntlo_vvl v256f64:$vz, i32:$vl), (PVPCNTLOvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvpcntlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntup_vvl v256f64:$vz, i32:$vl), (PVPCNTUPvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvpcntup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcntup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcnt_vvl v256f64:$vz, i32:$vl), (PVPCNTvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvpcnt_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVPCNTvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvpcnt_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVPCNTvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrv_vvl v256f64:$vz, i32:$vl), (VBRVvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vbrv_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (VBRVvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vbrv_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (VBRVvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvlo_vvl v256f64:$vz, i32:$vl), (PVBRVLOvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvbrvlo_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVLOvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvlo_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVBRVLOvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvup_vvl v256f64:$vz, i32:$vl), (PVBRVUPvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvbrvup_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVUPvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrvup_vvmvl v256f64:$vz, v256i1:$vm, v256f64:$pt, i32:$vl), (PVBRVUPvml_v v256f64:$vz, v256i1:$vm, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrv_vvl v256f64:$vz, i32:$vl), (PVBRVvl v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_pvbrv_vvvl v256f64:$vz, v256f64:$pt, i32:$vl), (PVBRVvl_v v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_pvbrv_vvMvl v256f64:$vz, v512i1:$vm, v256f64:$pt, i32:$vl), (PVBRVvml_v v256f64:$vz, v512i1:$vm, i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_vseq_vl i32:$vl), (VSEQl i32:$vl)>; def : Pat<(int_ve_vl_vseq_vvl v256f64:$pt, i32:$vl), (VSEQl_v i32:$vl, v256f64:$pt)>; def : Pat<(int_ve_vl_pvseqlo_vl i32:$vl), (PVSEQLOl i32:$vl)>; @@ -1602,3 +1638,21 @@ def : Pat<(int_ve_vl_pcvm_sml v256i1:$vmy, i32:$vl), (PCVMml v256i1:$vmy, i32:$vl)>; def : Pat<(int_ve_vl_lzvm_sml v256i1:$vmy, i32:$vl), (LZVMml v256i1:$vmy, i32:$vl)>; def : Pat<(int_ve_vl_tovm_sml v256i1:$vmy, i32:$vl), (TOVMml v256i1:$vmy, i32:$vl)>; +def : Pat<(int_ve_vl_lcr_sss i64:$sy, i64:$sz), (LCRrr i64:$sy, i64:$sz)>; +def : Pat<(int_ve_vl_lcr_sss i64:$sy, zero:$Z), (LCRrz i64:$sy, (LO7 $Z))>; +def : Pat<(int_ve_vl_lcr_sss uimm7:$N, i64:$sz), (LCRir (ULO7 $N), i64:$sz)>; +def : Pat<(int_ve_vl_lcr_sss uimm7:$N, zero:$Z), (LCRiz (ULO7 $N), (LO7 $Z))>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, i64:$sy, i64:$sz), (SCRrrr i64:$sy, i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, i64:$sy, zero:$Z), (SCRrzr i64:$sy, (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, uimm7:$N, i64:$sz), (SCRirr (ULO7 $N), i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_scr_sss i64:$sx, uimm7:$N, zero:$Z), (SCRizr (ULO7 $N), (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, i64:$sy, i64:$sz), (TSCRrrr i64:$sy, i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, i64:$sy, zero:$Z), (TSCRrzr i64:$sy, (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, uimm7:$N, i64:$sz), (TSCRirr (ULO7 $N), i64:$sz, i64:$sx)>; +def : Pat<(int_ve_vl_tscr_ssss i64:$sx, uimm7:$N, zero:$Z), (TSCRizr (ULO7 $N), (LO7 $Z), i64:$sx)>; +def : Pat<(int_ve_vl_fidcr_sss i64:$sy, uimm3:$I), (FIDCRri i64:$sy, (LO7 $I))>; +def : Pat<(int_ve_vl_fidcr_sss uimm7:$N, uimm3:$I), (FIDCRii (ULO7 $N), (LO7 $I))>; +def : Pat<(int_ve_vl_fencei ), (FENCEI )>; +def : Pat<(int_ve_vl_fencem_s uimm2:$I), (FENCEM (LO7 $I))>; +def : Pat<(int_ve_vl_fencec_s uimm3:$I), (FENCEC (LO7 $I))>; +def : Pat<(int_ve_vl_svob ), (SVOB )>; diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td @@ -2,9 +2,6 @@ /// Intrinsic patterns written by hand. -// SVOB pattern. -def : Pat<(int_ve_vl_svob), (SVOB)>; - // Pack patterns. def : Pat<(i64 (int_ve_vl_pack_f32p ADDRrii:$addr0, ADDRrii:$addr1)), (ORrr (f2l (LDUrii MEMrii:$addr0)), diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/fence.ll b/llvm/test/CodeGen/VE/VELIntrinsics/fence.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/fence.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +; Function Attrs: nounwind mustprogress +define void @_Z6fenceiv() { +; CHECK: fencei + tail call void @llvm.ve.vl.fencei() + ret void +} + +; Function Attrs: nounwind +declare void @llvm.ve.vl.fencei() + +; Function Attrs: nounwind mustprogress +define void @_Z7fencem3v() { +; CHECK: fencem 3 + tail call void @llvm.ve.vl.fencem.s(i32 3) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.ve.vl.fencem.s(i32) + +; Function Attrs: nounwind mustprogress +define void @_Z7fencec7v() { +; CHECK: fencec 7 + tail call void @llvm.ve.vl.fencec.s(i32 7) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.ve.vl.fencec.s(i32) diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/lcr.ll b/llvm/test/CodeGen/VE/VELIntrinsics/lcr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/lcr.ll @@ -0,0 +1,130 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test intrinsics for communication register +;;; +;;; Note: +;;; We test LCR, SCR, TSCR, and FIDCR instructions. + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +define i64 @lcr_sss(i64 noundef %0, i64 noundef %1) { +; CHECK-LABEL: lcr_sss: +; CHECK: # %bb.0: +; CHECK-NEXT: lcr %s0, %s0, %s1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call i64 @llvm.ve.vl.lcr.sss(i64 %0, i64 %1) + ret i64 %3 +} + +; Function Attrs: nofree nosync nounwind readnone +declare i64 @llvm.ve.vl.lcr.sss(i64, i64) + +; Function Attrs: nounwind +define void @scr_sss(i64 noundef %0, i64 noundef %1, i64 noundef %2) { +; CHECK-LABEL: scr_sss: +; CHECK: # %bb.0: +; CHECK-NEXT: scr %s0, %s1, %s2 +; CHECK-NEXT: b.l.t (, %s10) + tail call void @llvm.ve.vl.scr.sss(i64 %0, i64 %1, i64 %2) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.ve.vl.scr.sss(i64, i64, i64) + +; Function Attrs: nounwind +define i64 @tscr_ssss(i64 noundef %0, i64 noundef %1, i64 noundef %2) { +; CHECK-LABEL: tscr_ssss: +; CHECK: # %bb.0: +; CHECK-NEXT: tscr %s0, %s1, %s2 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call i64 @llvm.ve.vl.tscr.ssss(i64 %0, i64 %1, i64 %2) + ret i64 %4 +} + +; Function Attrs: nounwind +declare i64 @llvm.ve.vl.tscr.ssss(i64, i64, i64) + +; Function Attrs: nounwind +define i64 @fidcr_ss0(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss0: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 0) + ret i64 %2 +} + +; Function Attrs: nounwind +declare i64 @llvm.ve.vl.fidcr.sss(i64, i32) + +; Function Attrs: nounwind +define i64 @fidcr_ss1(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss1: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 1) + ret i64 %2 +} + +; Function Attrs: nounwind +define i64 @fidcr_ss2(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss2: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 2 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 2) + ret i64 %2 +} + +; Function Attrs: nounwind +define i64 @fidcr_ss3(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss3: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 3 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 3) + ret i64 %2 +} + +; Function Attrs: nounwind +define i64 @fidcr_ss4(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss4: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 4 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 4) + ret i64 %2 +} + +; Function Attrs: nounwind +define i64 @fidcr_ss5(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss5: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 5 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 5) + ret i64 %2 +} + +; Function Attrs: nounwind +define i64 @fidcr_ss6(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss6: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 6 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 6) + ret i64 %2 +} + +; Function Attrs: nounwind +define i64 @fidcr_ss7(i64 noundef %0) { +; CHECK-LABEL: fidcr_ss7: +; CHECK: # %bb.0: +; CHECK-NEXT: fidcr %s0, %s0, 7 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call i64 @llvm.ve.vl.fidcr.sss(i64 %0, i32 7) + ret i64 %2 +} + +!2 = !{!"clang version 15.0.0 (git@kaz7.github.com:sx-aurora-dev/llvm-project.git e0c5640dba6e9ba1cd29ed8d59b85c6378e48ac7)"} diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vbrv.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vbrv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vbrv.ll @@ -0,0 +1,210 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test vector bit reverse intrinsic instructions +;;; +;;; Note: +;;; We test VBRV*vl, VBRV*vl_v, VBRV*vml_v, PVBRV*vl, PVBRV*vl_v, PVBRV*vml_v instructions. + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vbrv_vvl(<256 x double> %0) { +; CHECK-LABEL: vbrv_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vbrv %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vbrv.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vbrv.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vbrv_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: vbrv_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vbrv %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vbrv.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vbrv.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vbrv_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: vbrv_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vbrv %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.vbrv.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vbrv.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrvlo_vvl(<256 x double> %0) { +; CHECK-LABEL: pvbrvlo_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv.lo %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvbrvlo.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrvlo.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrvlo_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvbrvlo_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv.lo %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvbrvlo.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrvlo.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrvlo_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvbrvlo_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv.lo %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvbrvlo.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrvlo.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrvup_vvl(<256 x double> %0) { +; CHECK-LABEL: pvbrvup_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv.up %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvbrvup.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrvup.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrvup_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvbrvup_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv.up %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvbrvup.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrvup.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrvup_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvbrvup_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv.up %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvbrvup.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrvup.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrv_vvl(<256 x double> %0) { +; CHECK-LABEL: pvbrv_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvbrv.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrv.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrv_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvbrv_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvbrv.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrv.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvbrv_vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvbrv_vvMvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvbrv %v1, %v0, %vm2 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvbrv.vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvbrv.vvMvl(<256 x double>, <512 x i1>, <256 x double>, i32) diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vldz.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vldz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vldz.ll @@ -0,0 +1,210 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test vector leading zero count intrinsic instructions +;;; +;;; Note: +;;; We test VLDZ*vl, VLDZ*vl_v, VLDZ*vml_v, PVLDZ*vl, PVLDZ*vl_v, PVLDZ*vml_v instructions. + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vldz_vvl(<256 x double> %0) { +; CHECK-LABEL: vldz_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vldz %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vldz.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vldz.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vldz_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: vldz_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vldz %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vldz.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vldz.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vldz_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: vldz_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vldz %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.vldz.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vldz.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldzlo_vvl(<256 x double> %0) { +; CHECK-LABEL: pvldzlo_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz.lo %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvldzlo.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldzlo.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldzlo_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvldzlo_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz.lo %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvldzlo.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldzlo.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldzlo_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvldzlo_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz.lo %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvldzlo.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldzlo.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldzup_vvl(<256 x double> %0) { +; CHECK-LABEL: pvldzup_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz.up %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvldzup.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldzup.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldzup_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvldzup_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz.up %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvldzup.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldzup.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldzup_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvldzup_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz.up %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvldzup.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldzup.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldz_vvl(<256 x double> %0) { +; CHECK-LABEL: pvldz_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvldz.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldz.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldz_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvldz_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvldz.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldz.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvldz_vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvldz_vvMvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvldz %v1, %v0, %vm2 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvldz.vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvldz.vvMvl(<256 x double>, <512 x i1>, <256 x double>, i32) diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vpcnt.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vpcnt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vpcnt.ll @@ -0,0 +1,210 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test vector population count intrinsic instructions +;;; +;;; Note: +;;; We test VPCNT*vl, VPCNT*vl_v, VPCNT*vml_v, PVPCNT*vl, PVPCNT*vl_v, PVPCNT*vml_v instructions. + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vpcnt_vvl(<256 x double> %0) { +; CHECK-LABEL: vpcnt_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vpcnt %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vpcnt.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vpcnt.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vpcnt_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: vpcnt_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vpcnt %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.vpcnt.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vpcnt.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vpcnt_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: vpcnt_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vpcnt %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.vpcnt.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vpcnt.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcntlo_vvl(<256 x double> %0) { +; CHECK-LABEL: pvpcntlo_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt.lo %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvpcntlo.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcntlo.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcntlo_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvpcntlo_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt.lo %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvpcntlo.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcntlo.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcntlo_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvpcntlo_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt.lo %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvpcntlo.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcntlo.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcntup_vvl(<256 x double> %0) { +; CHECK-LABEL: pvpcntup_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt.up %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvpcntup.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcntup.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcntup_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvpcntup_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt.up %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvpcntup.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcntup.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcntup_vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvpcntup_vvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt.up %v1, %v0, %vm1 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvpcntup.vvmvl(<256 x double> %0, <256 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcntup.vvmvl(<256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcnt_vvl(<256 x double> %0) { +; CHECK-LABEL: pvpcnt_vvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt %v0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.pvpcnt.vvl(<256 x double> %0, i32 256) + ret <256 x double> %2 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcnt.vvl(<256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcnt_vvvl(<256 x double> %0, <256 x double> %1) { +; CHECK-LABEL: pvpcnt_vvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt %v1, %v0 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call fast <256 x double> @llvm.ve.vl.pvpcnt.vvvl(<256 x double> %0, <256 x double> %1, i32 128) + ret <256 x double> %3 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcnt.vvvl(<256 x double>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @pvpcnt_vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2) { +; CHECK-LABEL: pvpcnt_vvMvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvpcnt %v1, %v0, %vm2 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.pvpcnt.vvMvl(<256 x double> %0, <512 x i1> %1, <256 x double> %2, i32 128) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.pvpcnt.vvMvl(<256 x double>, <512 x i1>, <256 x double>, i32)