Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -806,6 +806,18 @@ else setOperationAction(ISD::MUL, MVT::v4i32, Custom); + if (Subtarget.isISA3_1()) { + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::UDIV, MVT::v2i64, Legal); + setOperationAction(ISD::SDIV, MVT::v2i64, Legal); + setOperationAction(ISD::UDIV, MVT::v4i32, Legal); + setOperationAction(ISD::SDIV, MVT::v4i32, Legal); + setOperationAction(ISD::UREM, MVT::v2i64, Legal); + setOperationAction(ISD::SREM, MVT::v2i64, Legal); + setOperationAction(ISD::UREM, MVT::v4i32, Legal); + setOperationAction(ISD::SREM, MVT::v4i32, Legal); + } + setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::MUL, MVT::v16i8, Custom); Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -568,6 +568,33 @@ "vclrrb $vD, $vA, $rB", IIC_VecGeneral, [(set v16i8:$vD, (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>; + def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmulld $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>; + def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>; + def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmoduw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>; + def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>; + def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vmodud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>; + def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>; + def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>; + def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivsd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>; + def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>; } //---------------------------- Anonymous Patterns ----------------------------// Index: llvm/test/CodeGen/PowerPC/p10-vector-divide.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-vector-divide.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector divide instructions on Power10. +; This includes the low order and extended versions of vector divide, +; that operate on signed and unsigned words and doublewords. + +define <2 x i64> @test_vdivud(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vdivud: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivud v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = udiv <2 x i64> %a, %b + ret <2 x i64> %div +} + +define <2 x i64> @test_vdivsd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vdivsd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivsd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = sdiv <2 x i64> %a, %b + ret <2 x i64> %div +} + +define <4 x i32> @test_vdivuw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vdivuw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivuw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = udiv <4 x i32> %a, %b + ret <4 x i32> %div +} + +define <4 x i32> @test_vdivsw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vdivsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivsw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = sdiv <4 x i32> %a, %b + ret <4 x i32> %div +} Index: llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector modulo instructions on Power10. +; The vector modulo instructions operate on signed and unsigned words +; and doublewords. + +define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodud: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodud v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = urem <2 x i64> %a, %b + ret <2 x i64> %rem +} + +define <2 x i64> @test_vmodsd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodsd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = srem <2 x i64> %a, %b + ret <2 x i64> %rem +} + +define <4 x i32> @test_vmoduw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmoduw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmoduw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = urem <4 x i32> %a, %b + ret <4 x i32> %rem +} + +define <4 x i32> @test_vmodsw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmodsw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %rem = srem <4 x i32> %a, %b + ret <4 x i32> %rem +} + +define <2 x i64> @test_vmodud_with_div(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodud_with_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodud v4, v2, v3 +; CHECK-NEXT: vdivud v2, v2, v3 +; CHECK-NEXT: vaddudm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = urem <2 x i64> %a, %b + %div = udiv <2 x i64> %a, %b + %add = add <2 x i64> %rem, %div + ret <2 x i64> %add +} + +define <2 x i64> @test_vmodsd_with_div(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmodsd_with_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsd v4, v2, v3 +; CHECK-NEXT: vdivsd v2, v2, v3 +; CHECK-NEXT: vaddudm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = srem <2 x i64> %a, %b + %div = sdiv <2 x i64> %a, %b + %add = add <2 x i64> %rem, %div + ret <2 x i64> %add +} + +define <4 x i32> @test_vmoduw_with_div(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmoduw_with_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmoduw v4, v2, v3 +; CHECK-NEXT: vdivuw v2, v2, v3 +; CHECK-NEXT: vadduwm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = urem <4 x i32> %a, %b + %div = udiv <4 x i32> %a, %b + %add = add <4 x i32> %rem, %div + ret <4 x i32> %add +} + +define <4 x i32> @test_vmodsw_div(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmodsw_div: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmodsw v4, v2, v3 +; CHECK-NEXT: vdivsw v2, v2, v3 +; CHECK-NEXT: vadduwm v2, v4, v2 +; CHECK-NEXT: blr +entry: + %rem = srem <4 x i32> %a, %b + %div = sdiv <4 x i32> %a, %b + %add = add <4 x i32> %rem, %div + ret <4 x i32> %add +} Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector multiply instructions on Power10. + +define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulld v2, v3, v2 +; CHECK-NEXT: blr +entry: + %mul = mul <2 x i64> %b, %a + ret <2 x i64> %mul +} Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -42,3 +42,30 @@ # CHECK: vclrrb 1, 4, 3 0x10 0x24 0x19 0xcd + +# CHECK: vmulld 1, 2, 3 +0x10 0x22 0x19 0xc9 + +# CHECK: vmodsw 21, 11, 10 +0x12 0xab 0x57 0x8b + +# CHECK: vmoduw 21, 11, 10 +0x12 0xab 0x56 0x8b + +# CHECK: vmodsd 21, 11, 10 +0x12 0xab 0x57 0xcb + +# CHECK: vmodud 21, 11, 10 +0x12 0xab 0x56 0xcb + +# CHECK: vdivsw 21, 11, 10 +0x12 0xab 0x51 0x8b + +# CHECK: vdivuw 21, 11, 10 +0x12 0xab 0x50 0x8b + +# CHECK: vdivsd 21, 11, 10 +0x12 0xab 0x51 0xcb + +# CHECK: vdivud 21, 11, 10 +0x12 0xab 0x50 0xcb Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -45,3 +45,30 @@ # CHECK-BE: vclrrb 1, 4, 3 # encoding: [0x10,0x24,0x19,0xcd] # CHECK-LE: vclrrb 1, 4, 3 # encoding: [0xcd,0x19,0x24,0x10] vclrrb 1, 4, 3 +# CHECK-BE: vmulld 1, 2, 3 # encoding: [0x10,0x22,0x19,0xc9] +# CHECK-LE: vmulld 1, 2, 3 # encoding: [0xc9,0x19,0x22,0x10] + vmulld 1, 2, 3 +# CHECK-BE: vmodsw 21, 11, 10 # encoding: [0x12,0xab,0x57,0x8b] +# CHECK-LE: vmodsw 21, 11, 10 # encoding: [0x8b,0x57,0xab,0x12] + vmodsw 21, 11, 10 +# CHECK-BE: vmoduw 21, 11, 10 # encoding: [0x12,0xab,0x56,0x8b] +# CHECK-LE: vmoduw 21, 11, 10 # encoding: [0x8b,0x56,0xab,0x12] + vmoduw 21, 11, 10 +# CHECK-BE: vmodsd 21, 11, 10 # encoding: [0x12,0xab,0x57,0xcb] +# CHECK-LE: vmodsd 21, 11, 10 # encoding: [0xcb,0x57,0xab,0x12] + vmodsd 21, 11, 10 +# CHECK-BE: vmodud 21, 11, 10 # encoding: [0x12,0xab,0x56,0xcb] +# CHECK-LE: vmodud 21, 11, 10 # encoding: [0xcb,0x56,0xab,0x12] + vmodud 21, 11, 10 +# CHECK-BE: vdivsw 21, 11, 10 # encoding: [0x12,0xab,0x51,0x8b] +# CHECK-LE: vdivsw 21, 11, 10 # encoding: [0x8b,0x51,0xab,0x12] + vdivsw 21, 11, 10 +# CHECK-BE: vdivuw 21, 11, 10 # encoding: [0x12,0xab,0x50,0x8b] +# CHECK-LE: vdivuw 21, 11, 10 # encoding: [0x8b,0x50,0xab,0x12] + vdivuw 21, 11, 10 +# CHECK-BE: vdivsd 21, 11, 10 # encoding: [0x12,0xab,0x51,0xcb] +# CHECK-LE: vdivsd 21, 11, 10 # encoding: [0xcb,0x51,0xab,0x12] + vdivsd 21, 11, 10 +# CHECK-BE: vdivud 21, 11, 10 # encoding: [0x12,0xab,0x50,0xcb] +# CHECK-LE: vdivud 21, 11, 10 # encoding: [0xcb,0x50,0xab,0x12] + vdivud 21, 11, 10