Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -305,6 +305,13 @@ BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "") BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "") +// P10 Move to VSR with Mask built-ins. +BUILTIN(__builtin_altivec_mtvsrbm, "V16UcULLi", "") +BUILTIN(__builtin_altivec_mtvsrhm, "V8UsULLi", "") +BUILTIN(__builtin_altivec_mtvsrwm, "V4UiULLi", "") +BUILTIN(__builtin_altivec_mtvsrdm, "V2ULLiULLi", "") +BUILTIN(__builtin_altivec_mtvsrqm, "V1ULLLiULLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16791,6 +16791,33 @@ return __builtin_altivec_vextractqm(__a); } +/* vec_gen[b|h|w|d|q]m */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_genbm(unsigned long long __bm) { + return __builtin_altivec_mtvsrbm(__bm); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_genhm(unsigned long long __bm) { + return __builtin_altivec_mtvsrhm(__bm); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_genwm(unsigned long long __bm) { + return __builtin_altivec_mtvsrwm(__bm); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_gendm(unsigned long long __bm) { + return __builtin_altivec_mtvsrdm(__bm); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_genqm(unsigned long long __bm) { + return __builtin_altivec_mtvsrqm(__bm); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -12,6 +12,7 @@ vector unsigned long long vulla, vullb, vullc; vector unsigned __int128 vui128a, vui128b, vui128c; unsigned int uia; +unsigned long long ulla; unsigned int test_vec_extractm_uc(void) { // CHECK: @llvm.ppc.altivec.vextractbm(<16 x i8> @@ -43,6 +44,64 @@ return vec_extractm(vui128a); } +vector unsigned char test_vec_genbm(void) { + // CHECK: @llvm.ppc.altivec.mtvsrbm(i64 + // CHECK-NEXT: ret <16 x i8> + return vec_genbm(ulla); +} + +vector unsigned char test_vec_genbm_imm(void) { + // CHECK: store i64 1 + // CHECK: @llvm.ppc.altivec.mtvsrbm(i64 + // CHECK-NEXT: ret <16 x i8> + return vec_genbm(1); +} + +vector unsigned char test_vec_genbm_imm2(void) { + // CHECK: store i64 255 + // CHECK: @llvm.ppc.altivec.mtvsrbm(i64 + // CHECK-NEXT: ret <16 x i8> + return vec_genbm(255); +} + +vector unsigned char test_vec_genbm_imm3(void) { + // CHECK: store i64 256 + // CHECK: @llvm.ppc.altivec.mtvsrbm(i64 + // CHECK-NEXT: ret <16 x i8> + return vec_genbm(256); +} + +vector unsigned char test_vec_genbm_imm4(void) { + // CHECK: store i64 266 + // CHECK: @llvm.ppc.altivec.mtvsrbm(i64 + // CHECK-NEXT: ret <16 x i8> + return vec_genbm(266); +} + +vector unsigned short test_vec_genhm(void) { + // CHECK: @llvm.ppc.altivec.mtvsrhm(i64 + // CHECK-NEXT: ret <8 x i16> + return vec_genhm(ulla); +} + +vector unsigned int test_vec_genwm(void) { + // CHECK: @llvm.ppc.altivec.mtvsrwm(i64 + // CHECK-NEXT: ret <4 x i32> + return vec_genwm(ulla); +} + +vector unsigned long long test_vec_gendm(void) { + // CHECK: @llvm.ppc.altivec.mtvsrdm(i64 + // CHECK-NEXT: ret <2 x i64> + return vec_gendm(ulla); +} + +vector unsigned __int128 test_vec_genqm(void) { + // CHECK: @llvm.ppc.altivec.mtvsrqm(i64 + // CHECK-NEXT: ret <1 x i128> + return vec_genqm(ulla); +} + vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> // CHECK-NEXT: ret <2 x i64> Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -435,6 +435,18 @@ def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Move to VSR with Mask Intrinsics. + def int_ppc_altivec_mtvsrbm : GCCBuiltin<"__builtin_altivec_mtvsrbm">, + Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrhm : GCCBuiltin<"__builtin_altivec_mtvsrhm">, + Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrwm : GCCBuiltin<"__builtin_altivec_mtvsrwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrdm : GCCBuiltin<"__builtin_altivec_mtvsrdm">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrqm : GCCBuiltin<"__builtin_altivec_mtvsrqm">, + Intrinsic<[llvm_v1i128_ty], [llvm_i64_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10320,12 +10320,31 @@ SDLoc dl(Op); - if (IntrinsicID == Intrinsic::thread_pointer) { + // Lowering intrinsics by switching on the IntrinsicID, as future intrinsics + // will be also be lowered in this manner. + switch(IntrinsicID) { + case Intrinsic::thread_pointer: // Reads the thread pointer register, used for __builtin_thread_pointer. if (Subtarget.isPPC64()) return DAG.getRegister(PPC::X13, MVT::i64); return DAG.getRegister(PPC::R2, MVT::i32); + + case Intrinsic::ppc_altivec_mtvsrbm: { + // The llvm.ppc.altivec.mtvsrbm intrinsic can correspond to two different + // instructions: one that takes an immediate as an operand (mtvsrbmi) + // and one that does not (mtvsrbm). + // For mtvsrbmi, the immediate value must fit within 16 bits (up to 255). + // If the immediate exceeds these bits, we mask out the remaining bits. + SDValue ImmOperand = Op.getOperand(1); + if (!isa(ImmOperand)) + break; + + unsigned Imm = + (cast(ImmOperand)->getZExtValue()) & 0xFF; + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Op.getOperand(0), + DAG.getConstant(Imm, dl, MVT::i64)); } + } // End switch(Intrinsic ID). // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -586,6 +586,30 @@ "vextractqm $rD, $vB", IIC_VecGeneral, [(set i32:$rD, (int_ppc_altivec_vextractqm v1i128:$vB))]>; + def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrbm $vD, $rB", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_mtvsrbm i64:$rB))]>; + def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrhm $vD, $rB", IIC_VecGeneral, + [(set v8i16:$vD, + (int_ppc_altivec_mtvsrhm i64:$rB))]>; + def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrwm $vD, $rB", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_mtvsrwm i64:$rB))]>; + def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrdm $vD, $rB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_mtvsrdm i64:$rB))]>; + def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB), + "mtvsrqm $vD, $rB", IIC_VecGeneral, + [(set v1i128:$vD, + (int_ppc_altivec_mtvsrqm i64:$rB))]>; + def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D), + "mtvsrbmi $vD, $D", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_mtvsrbm imm:$D))]>; def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpdepd $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, Index: llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll +++ llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -61,3 +61,99 @@ declare i32 @llvm.ppc.altivec.vextractwm(<4 x i32>) declare i32 @llvm.ppc.altivec.vextractdm(<2 x i64>) declare i32 @llvm.ppc.altivec.vextractqm(<1 x i128>) + +define <16 x i8> @test_mtvsrbm(i64 %a) { +; CHECK-LABEL: test_mtvsrbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrbm v2, r3 +; CHECK-NEXT: blr +entry: + %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 %a) + ret <16 x i8> %mv +} + +define <16 x i8> @test_mtvsrbmi() { +; CHECK-LABEL: test_mtvsrbmi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrbmi v2, 1 +; CHECK-NEXT: blr +entry: + %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 1) + ret <16 x i8> %mv +} + +define <16 x i8> @test_mtvsrbmi2() { +; CHECK-LABEL: test_mtvsrbmi2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrbmi v2, 255 +; CHECK-NEXT: blr +entry: + %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 255) + ret <16 x i8> %mv +} + +define <16 x i8> @test_mtvsrbmi3() { +; CHECK-LABEL: test_mtvsrbmi3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrbmi v2, 0 +; CHECK-NEXT: blr +entry: + %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 256) + ret <16 x i8> %mv +} + +define <16 x i8> @test_mtvsrbmi4() { +; CHECK-LABEL: test_mtvsrbmi4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrbmi v2, 10 +; CHECK-NEXT: blr +entry: + %mv = tail call <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64 266) + ret <16 x i8> %mv +} + +define <8 x i16> @test_mtvsrhm(i64 %a) { +; CHECK-LABEL: test_mtvsrhm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrhm v2, r3 +; CHECK-NEXT: blr +entry: + %mv = tail call <8 x i16> @llvm.ppc.altivec.mtvsrhm(i64 %a) + ret <8 x i16> %mv +} + +define <4 x i32> @test_mtvsrwm(i64 %a) { +; CHECK-LABEL: test_mtvsrwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrwm v2, r3 +; CHECK-NEXT: blr +entry: + %mv = tail call <4 x i32> @llvm.ppc.altivec.mtvsrwm(i64 %a) + ret <4 x i32> %mv +} + +define <2 x i64> @test_mtvsrdm(i64 %a) { +; CHECK-LABEL: test_mtvsrdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdm v2, r3 +; CHECK-NEXT: blr +entry: + %mv = tail call <2 x i64> @llvm.ppc.altivec.mtvsrdm(i64 %a) + ret <2 x i64> %mv +} + +define <1 x i128> @test_mtvsrqm(i64 %a) { +; CHECK-LABEL: test_mtvsrqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrqm v2, r3 +; CHECK-NEXT: blr +entry: + %mv = tail call <1 x i128> @llvm.ppc.altivec.mtvsrqm(i64 %a) + ret <1 x i128> %mv +} + +declare <16 x i8> @llvm.ppc.altivec.mtvsrbm(i64) +declare <8 x i16> @llvm.ppc.altivec.mtvsrhm(i64) +declare <4 x i32> @llvm.ppc.altivec.mtvsrwm(i64) +declare <2 x i64> @llvm.ppc.altivec.mtvsrdm(i64) +declare <1 x i128> @llvm.ppc.altivec.mtvsrqm(i64) Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -16,6 +16,24 @@ # CHECK: vextractqm 1, 2 0x10 0x2c 0x16 0x42 +# CHECK: mtvsrbm 1, 2 +0x10 0x30 0x16 0x42 + +# CHECK: mtvsrhm 1, 2 +0x10 0x31 0x16 0x42 + +# CHECK: mtvsrwm 1, 2 +0x10 0x32 0x16 0x42 + +# CHECK: mtvsrdm 1, 2 +0x10 0x33 0x16 0x42 + +# CHECK: mtvsrqm 1, 2 +0x10 0x34 0x16 0x42 + +# CHECK: mtvsrbmi 1, 65535 +0x10 0x3f 0xff 0xd5 + # CHECK: vpdepd 1, 2, 0 0x10 0x22 0x05 0xcd Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -18,6 +18,24 @@ # CHECK-BE: vextractqm 1, 2 # encoding: [0x10,0x2c,0x16,0x42] # CHECK-LE: vextractqm 1, 2 # encoding: [0x42,0x16,0x2c,0x10] vextractqm 1, 2 +# CHECK-BE: mtvsrbm 1, 2 # encoding: [0x10,0x30,0x16,0x42] +# CHECK-LE: mtvsrbm 1, 2 # encoding: [0x42,0x16,0x30,0x10] + mtvsrbm 1, 2 +# CHECK-BE: mtvsrhm 1, 2 # encoding: [0x10,0x31,0x16,0x42] +# CHECK-LE: mtvsrhm 1, 2 # encoding: [0x42,0x16,0x31,0x10] + mtvsrhm 1, 2 +# CHECK-BE: mtvsrwm 1, 2 # encoding: [0x10,0x32,0x16,0x42] +# CHECK-LE: mtvsrwm 1, 2 # encoding: [0x42,0x16,0x32,0x10] + mtvsrwm 1, 2 +# CHECK-BE: mtvsrdm 1, 2 # encoding: [0x10,0x33,0x16,0x42] +# CHECK-LE: mtvsrdm 1, 2 # encoding: [0x42,0x16,0x33,0x10] + mtvsrdm 1, 2 +# CHECK-BE: mtvsrqm 1, 2 # encoding: [0x10,0x34,0x16,0x42] +# CHECK-LE: mtvsrqm 1, 2 # encoding: [0x42,0x16,0x34,0x10] + mtvsrqm 1, 2 +# CHECK-BE: mtvsrbmi 1, 31 # encoding: [0x10,0x2f,0x00,0x15] +# CHECK-LE: mtvsrbmi 1, 31 # encoding: [0x15,0x00,0x2f,0x10] + mtvsrbmi 1, 31 # CHECK-BE: vpdepd 1, 2, 0 # encoding: [0x10,0x22,0x05,0xcd] # CHECK-LE: vpdepd 1, 2, 0 # encoding: [0xcd,0x05,0x22,0x10] vpdepd 1, 2, 0