diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.td @@ -30,6 +30,24 @@ def : Pat<(v512i1 (int_ve_vl_insert_vm512l v512i1:$vmx, v256i1:$vmy)), (INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_odd)>; +// VMRG patterns. +def : Pat<(int_ve_vl_vmrgw_vsvMl i32:$sy, v256f64:$vz, v512i1:$vm, i32:$vl), + (VMRGWrvml (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl)>; +def : Pat<(int_ve_vl_vmrgw_vsvMvl i32:$sy, v256f64:$vz, v512i1:$vm, + v256f64:$pt, i32:$vl), + (VMRGWrvml_v (i2l i32:$sy), v256f64:$vz, v512i1:$vm, i32:$vl, + v256f64:$pt)>; + +// VMV patterns. +def : Pat<(int_ve_vl_vmv_vsvl i32:$sy, v256f64:$vz, i32:$vl), + (VMVrvl (i2l i32:$sy), v256f64:$vz, i32:$vl)>; +def : Pat<(int_ve_vl_vmv_vsvvl i32:$sy, v256f64:$vz, v256f64:$pt, i32:$vl), + (VMVrvl_v (i2l i32:$sy), v256f64:$vz, i32:$vl, v256f64:$pt)>; +def : Pat<(int_ve_vl_vmv_vsvmvl i32:$sy, v256f64:$vz, v256i1:$vm, v256f64:$pt, + i32:$vl), + (VMVrvml_v (i2l i32:$sy), v256f64:$vz, v256i1:$vm, i32:$vl, + v256f64:$pt)>; + // LSV patterns. def : Pat<(int_ve_vl_lsv_vvss v256f64:$pt, i32:$sy, i64:$sz), (LSVrr_v (i2l i32:$sy), i64:$sz, v256f64:$pt)>; diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll --- a/llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vmrg.ll @@ -131,3 +131,38 @@ ; Function Attrs: nounwind readnone declare <256 x double> @llvm.ve.vl.vmrgw.vvvMvl(<256 x double>, <256 x double>, <512 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vmrgw_vsvMl(i32 signext %0, <256 x double> %1, <512 x i1> %2) { +; CHECK-LABEL: vmrgw_vsvMl: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg.w %v0, %s0, %v0, %vm2 +; CHECK-NEXT: b.l.t (, %s10) + %4 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vsvMl(i32 %0, <256 x double> %1, <512 x i1> %2, i32 256) + ret <256 x double> %4 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vmrgw.vsvMl(i32, <256 x double>, <512 x i1>, i32) + +; Function Attrs: nounwind readnone +define fastcc <256 x double> @vmrgw_vsvMvl(i32 signext %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3) { +; CHECK-LABEL: vmrgw_vsvMvl: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s1, 128 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vmrg.w %v1, %s0, %v0, %vm2 +; CHECK-NEXT: lea %s16, 256 +; CHECK-NEXT: lvl %s16 +; CHECK-NEXT: vor %v0, (0)1, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %5 = tail call fast <256 x double> @llvm.ve.vl.vmrgw.vsvMvl(i32 %0, <256 x double> %1, <512 x i1> %2, <256 x double> %3, i32 128) + ret <256 x double> %5 +} + +; Function Attrs: nounwind readnone +declare <256 x double> @llvm.ve.vl.vmrgw.vsvMvl(i32, <256 x double>, <512 x i1>, <256 x double>, i32) diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vmv.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vmv.ll --- a/llvm/test/CodeGen/VE/VELIntrinsics/vmv.ll +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vmv.ll @@ -6,18 +6,19 @@ ;;; We test VMVivl and VMVivl_v, and VMVivml_v instructions. ; Function Attrs: nounwind -define void @vmv_vsvl(i8* %0, i64 %1, i32 signext %2) { +define void @vmv_vsvl(i8* %0, i32 signext %1) { ; CHECK-LABEL: vmv_vsvl: ; CHECK: # %bb.0: -; CHECK-NEXT: lea %s1, 256 -; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vmv %v0, 31, %v0 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: vmv %v0, %s1, %v0 ; CHECK-NEXT: vst %v0, 8, %s0 ; CHECK-NEXT: b.l.t (, %s10) - %4 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) - %5 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvl(i32 31, <256 x double> %4, i32 256) - tail call void @llvm.ve.vl.vst.vssl(<256 x double> %5, i64 8, i8* %0, i32 256) + %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + %4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvl(i32 %1, <256 x double> %3, i32 256) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %0, i32 256) ret void } @@ -31,20 +32,37 @@ declare void @llvm.ve.vl.vst.vssl(<256 x double>, i64, i8*, i32) ; Function Attrs: nounwind -define void @vmv_vsvvl(i8* %0, i32 signext %1) { -; CHECK-LABEL: vmv_vsvvl: +define void @vmv_vsvl_imm(i8* %0) { +; CHECK-LABEL: vmv_vsvl_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: lea %s2, 128 -; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vmv %v0, 31, %v0 -; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + %3 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvl(i32 31, <256 x double> %2, i32 256) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vmv_vsvvl(i8* %0, i32 signext %1) { +; CHECK-LABEL: vmv_vsvvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s3, 128 +; CHECK-NEXT: lvl %s3 +; CHECK-NEXT: vmv %v0, %s1, %v0 +; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vst %v0, 8, %s0 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) - %4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvvl(i32 31, <256 x double> %3, <256 x double> %3, i32 128) + %4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvvl(i32 %1, <256 x double> %3, <256 x double> %3, i32 128) tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %0, i32 256) ret void } @@ -53,23 +71,62 @@ declare <256 x double> @llvm.ve.vl.vmv.vsvvl(i32, <256 x double>, <256 x double>, i32) ; Function Attrs: nounwind -define void @vmv_vsvmvl(i8* %0, i32 signext %1) { -; CHECK-LABEL: vmv_vsvmvl: +define void @vmv_vsvvl_imm(i8* %0) { +; CHECK-LABEL: vmv_vsvvl_imm: ; CHECK: # %bb.0: ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 ; CHECK-NEXT: lea %s2, 128 ; CHECK-NEXT: lvl %s2 -; CHECK-NEXT: vmv %v0, 31, %v0, %vm1 +; CHECK-NEXT: vmv %v0, 31, %v0 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + %3 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvvl(i32 31, <256 x double> %2, <256 x double> %2, i32 128) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, i8* %0, i32 256) + ret void +} + +; Function Attrs: nounwind +define void @vmv_vsvmvl(i8* %0, i32 signext %1) { +; CHECK-LABEL: vmv_vsvmvl: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s3, 128 +; CHECK-NEXT: lvl %s3 +; CHECK-NEXT: vmv %v0, %s1, %v0, %vm1 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vst %v0, 8, %s0 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) - %4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvmvl(i32 31, <256 x double> %3, <256 x i1> undef, <256 x double> %3, i32 128) + %4 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvmvl(i32 %1, <256 x double> %3, <256 x i1> undef, <256 x double> %3, i32 128) tail call void @llvm.ve.vl.vst.vssl(<256 x double> %4, i64 8, i8* %0, i32 256) ret void } ; Function Attrs: nounwind readnone declare <256 x double> @llvm.ve.vl.vmv.vsvmvl(i32, <256 x double>, <256 x i1>, <256 x double>, i32) + +; Function Attrs: nounwind +define void @vmv_vsvmvl_imm(i8* %0) { +; CHECK-LABEL: vmv_vsvmvl_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: lea %s2, 128 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vmv %v0, 31, %v0, %vm1 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) + %3 = tail call fast <256 x double> @llvm.ve.vl.vmv.vsvmvl(i32 31, <256 x double> %2, <256 x i1> undef, <256 x double> %2, i32 128) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %3, i64 8, i8* %0, i32 256) + ret void +}