diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td --- a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td +++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td @@ -84,3 +84,7 @@ let TargetPrefix = "ve" in def int_ve_vl_lvsl_svs : GCCBuiltin<"__builtin_ve_vl_lvsl_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_lvsd_svs : GCCBuiltin<"__builtin_ve_vl_lvsd_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; let TargetPrefix = "ve" in def int_ve_vl_lvss_svs : GCCBuiltin<"__builtin_ve_vl_lvss_svs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvm_mmss : GCCBuiltin<"__builtin_ve_vl_lvm_mmss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvm_MMss : GCCBuiltin<"__builtin_ve_vl_lvm_MMss">, Intrinsic<[LLVMType], [LLVMType, LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_svm_sms : GCCBuiltin<"__builtin_ve_vl_svm_sms">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_svm_sMs : GCCBuiltin<"__builtin_ve_vl_svm_sMs">, Intrinsic<[LLVMType], [LLVMType, LLVMType], [IntrNoMem]>; diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -717,6 +717,12 @@ return GlobalBaseReg; } +static Register getVM512Upper(Register reg) { + return (reg - VE::VMP0) * 2 + VE::VM0; +} + +static Register getVM512Lower(Register reg) { return getVM512Upper(reg) + 1; } + bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { switch (MI.getOpcode()) { case VE::EXTEND_STACK: { @@ -729,6 +735,84 @@ case VE::GETSTACKTOP: { return expandGetStackTopPseudo(MI); } + + case VE::LVMyir: + case VE::LVMyim: + case VE::LVMyir_y: + case VE::LVMyim_y: { + Register VMXu = getVM512Upper(MI.getOperand(0).getReg()); + Register VMXl = getVM512Lower(MI.getOperand(0).getReg()); + int64_t Imm = MI.getOperand(1).getImm(); + bool IsSrcReg = + MI.getOpcode() == VE::LVMyir || MI.getOpcode() == VE::LVMyir_y; + Register Src = IsSrcReg ? MI.getOperand(2).getReg() : VE::NoRegister; + int64_t MImm = IsSrcReg ? 0 : MI.getOperand(2).getImm(); + bool KillSrc = IsSrcReg ? MI.getOperand(2).isKill() : false; + Register VMX = VMXl; + if (Imm >= 4) { + VMX = VMXu; + Imm -= 4; + } + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + switch (MI.getOpcode()) { + case VE::LVMyir: + BuildMI(*MBB, MI, DL, get(VE::LVMir)) + .addDef(VMX) + .addImm(Imm) + .addReg(Src, getKillRegState(KillSrc)); + break; + case VE::LVMyim: + BuildMI(*MBB, MI, DL, get(VE::LVMim)) + .addDef(VMX) + .addImm(Imm) + .addImm(MImm); + break; + case VE::LVMyir_y: + assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() && + "LVMyir_y has different register in 3rd operand"); + BuildMI(*MBB, MI, DL, get(VE::LVMir_m)) + .addDef(VMX) + .addImm(Imm) + .addReg(Src, getKillRegState(KillSrc)) + .addReg(VMX); + break; + case VE::LVMyim_y: + assert(MI.getOperand(0).getReg() == MI.getOperand(3).getReg() && + "LVMyim_y has different register in 3rd operand"); + BuildMI(*MBB, MI, DL, get(VE::LVMim_m)) + .addDef(VMX) + .addImm(Imm) + .addImm(MImm) + .addReg(VMX); + break; + } + MI.eraseFromParent(); + return true; + } + case VE::SVMyi: { + Register Dest = MI.getOperand(0).getReg(); + Register VMZu = getVM512Upper(MI.getOperand(1).getReg()); + Register VMZl = getVM512Lower(MI.getOperand(1).getReg()); + bool KillSrc = MI.getOperand(1).isKill(); + int64_t Imm = MI.getOperand(2).getImm(); + Register VMZ = VMZl; + if (Imm >= 4) { + VMZ = VMZu; + Imm -= 4; + } + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineInstrBuilder MIB = + BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm); + MachineInstr *Inst = MIB.getInstr(); + MI.eraseFromParent(); + if (KillSrc) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true); + } + return true; + } } return false; } diff --git a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td --- a/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td +++ b/llvm/lib/Target/VE/VEInstrIntrinsicVL.gen.td @@ -162,3 +162,7 @@ def : Pat<(int_ve_vl_pfchv_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVirl (LO7 $I), i64:$sz, i32:$vl)>; def : Pat<(int_ve_vl_pfchvnc_ssl i64:$sy, i64:$sz, i32:$vl), (PFCHVNCrrl i64:$sy, i64:$sz, i32:$vl)>; def : Pat<(int_ve_vl_pfchvnc_ssl simm7:$I, i64:$sz, i32:$vl), (PFCHVNCirl (LO7 $I), i64:$sz, i32:$vl)>; +def : Pat<(int_ve_vl_lvm_mmss v256i1:$ptm, uimm6:$N, i64:$sz), (LVMir_m (ULO7 $N), i64:$sz, v256i1:$ptm)>; +def : Pat<(int_ve_vl_lvm_MMss v512i1:$ptm, uimm6:$N, i64:$sz), (LVMyir_y (ULO7 $N), i64:$sz, v512i1:$ptm)>; +def : Pat<(int_ve_vl_svm_sms v256i1:$vmz, uimm6:$N), (SVMmi v256i1:$vmz, (ULO7 $N))>; +def : Pat<(int_ve_vl_svm_sMs v512i1:$vmz, uimm6:$N), (SVMyi v512i1:$vmz, (ULO7 $N))>; diff --git a/llvm/lib/Target/VE/VEInstrVec.td b/llvm/lib/Target/VE/VEInstrVec.td --- a/llvm/lib/Target/VE/VEInstrVec.td +++ b/llvm/lib/Target/VE/VEInstrVec.td @@ -2,6 +2,26 @@ // Vector Instructions //===----------------------------------------------------------------------===// +// Pseudo instructions for VM512 modifications +// +// Specifies hasSideEffects = 0 to disable UnmodeledSideEffects. + +let hasSideEffects = 0 in { + let Constraints = "$vx = $vd", DisableEncoding = "$vd" in { + def LVMyir_y : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz, VM512:$vd), + "# pseudo LVM $vx, $sy, $sz, $vd">; + def LVMyim_y : Pseudo<(outs VM512:$vx), + (ins uimm3:$sy, mimm:$sz, VM512:$vd), + "# pseudo LVM $vx, $sy, $sz, $vd">; + } + def LVMyir : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, I64:$sz), + "# pseudo LVM $vx, $sy, $sz">; + def LVMyim : Pseudo<(outs VM512:$vx), (ins uimm3:$sy, mimm:$sz), + "# pseudo LVM $vx, $sy, $sz">; + def SVMyi : Pseudo<(outs I64:$sx), (ins VM512:$vz, uimm3:$sy), + "# pseudo SVM $sx, $vz, $sy">; +} + //===----------------------------------------------------------------------===// // Instructions // diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -86,10 +86,22 @@ ++ItAlias) Reserved.set(*ItAlias); + // Reserve constant registers. + Reserved.set(VE::VM0); + Reserved.set(VE::VMP0); + return Reserved; } -bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { return false; } +bool VERegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { + switch (PhysReg) { + case VE::VM0: + case VE::VMP0: + return true; + default: + return false; + } +} const TargetRegisterClass * VERegisterInfo::getPointerRegClass(const MachineFunction &MF, diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/lvm.ll b/llvm/test/CodeGen/VE/VELIntrinsics/lvm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/VELIntrinsics/lvm.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +;;; Test load/save vector mask intrinsic instructions +;;; +;;; Note: +;;; We test LVMir_m, LVMyir_y, SVMmi, and SVMyi instructions. + +; Function Attrs: nounwind readnone +define i64 @lvm_mmss(i8* nocapture readnone %0, i64 %1) { +; CHECK-LABEL: lvm_mmss: +; CHECK: # %bb.0: +; CHECK-NEXT: lvm %vm1, 3, %s1 +; CHECK-NEXT: svm %s0, %vm1, 3 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1> undef, i64 3, i64 %1) + %4 = tail call i64 @llvm.ve.vl.svm.sms(<256 x i1> %3, i64 3) + ret i64 %4 +} + +; Function Attrs: nounwind readnone +declare <256 x i1> @llvm.ve.vl.lvm.mmss(<256 x i1>, i64, i64) + +; Function Attrs: nounwind readnone +declare i64 @llvm.ve.vl.svm.sms(<256 x i1>, i64) + +; Function Attrs: nounwind readnone +define i64 @lvml_MMss(i8* nocapture readnone %0, i64 %1) { +; CHECK-LABEL: lvml_MMss: +; CHECK: # %bb.0: +; CHECK-NEXT: lvm %vm2, 1, %s1 +; CHECK-NEXT: svm %s0, %vm3, 3 +; CHECK-NEXT: svm %s1, %vm2, 2 +; CHECK-NEXT: adds.l %s0, %s1, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %3 = tail call <512 x i1> @llvm.ve.vl.lvm.MMss(<512 x i1> undef, i64 5, i64 %1) + %4 = tail call i64 @llvm.ve.vl.svm.sMs(<512 x i1> %3, i64 3) + %5 = tail call i64 @llvm.ve.vl.svm.sMs(<512 x i1> %3, i64 6) + %6 = add i64 %5, %4 + ret i64 %6 +} + +; Function Attrs: nounwind readnone +declare <512 x i1> @llvm.ve.vl.lvm.MMss(<512 x i1>, i64, i64) + +; Function Attrs: nounwind readnone +declare i64 @llvm.ve.vl.svm.sMs(<512 x i1>, i64) diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/vst.ll b/llvm/test/CodeGen/VE/VELIntrinsics/vst.ll --- a/llvm/test/CodeGen/VE/VELIntrinsics/vst.ll +++ b/llvm/test/CodeGen/VE/VELIntrinsics/vst.ll @@ -32,7 +32,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vst.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -63,7 +63,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vst.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -94,7 +94,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst.nc %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -125,7 +125,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst.nc %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -156,7 +156,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -187,7 +187,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -218,7 +218,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst.nc.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -249,7 +249,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst.nc.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -280,7 +280,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -311,7 +311,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -342,7 +342,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu.nc %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstunc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -373,7 +373,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu.nc %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstunc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -404,7 +404,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstuot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -435,7 +435,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstuot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -466,7 +466,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu.nc.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstuncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -497,7 +497,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu.nc.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstuncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -528,7 +528,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -559,7 +559,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -590,7 +590,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl.nc %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstlnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -621,7 +621,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl.nc %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstlnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -652,7 +652,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstlot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -683,7 +683,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstlot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -714,7 +714,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl.nc.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstlncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -745,7 +745,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl.nc.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstlncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -776,7 +776,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst2d %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst2d %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2d.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -807,7 +807,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst2d %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst2d %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2d.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -838,7 +838,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst2d.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst2d.nc %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2dnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -869,7 +869,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst2d.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst2d.nc %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2dnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -900,7 +900,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst2d.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst2d.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2dot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -931,7 +931,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst2d.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst2d.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2dot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -962,7 +962,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vst2d.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vst2d.nc.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2dncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -993,7 +993,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vst2d.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vst2d.nc.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vst2dncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1024,7 +1024,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu2d %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu2d %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2d.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1055,7 +1055,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu2d %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu2d %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2d.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1086,7 +1086,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu2d.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu2d.nc %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2dnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1117,7 +1117,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu2d.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu2d.nc %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2dnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1148,7 +1148,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu2d.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu2d.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2dot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1179,7 +1179,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu2d.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu2d.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2dot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1210,7 +1210,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstu2d.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstu2d.nc.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2dncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1241,7 +1241,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstu2d.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstu2d.nc.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstu2dncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1272,7 +1272,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl2d %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl2d %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2d.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1303,7 +1303,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl2d %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl2d %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2d.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1334,7 +1334,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl2d.nc %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl2d.nc %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2dnc.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1365,7 +1365,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl2d.nc %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl2d.nc %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2dnc.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1396,7 +1396,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl2d.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl2d.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2dot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1427,7 +1427,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl2d.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl2d.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2dot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256) @@ -1458,7 +1458,7 @@ ; CHECK-NEXT: lea %s2, 256 ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vld %v0, %s1, %s0 -; CHECK-NEXT: vstl2d.nc.ot %v0, %s1, %s0, %vm0 +; CHECK-NEXT: vstl2d.nc.ot %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %3 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 %1, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2dncot.vssml(<256 x double> %3, i64 %1, i8* %0, <256 x i1> undef, i32 256) @@ -1489,7 +1489,7 @@ ; CHECK-NEXT: lea %s1, 256 ; CHECK-NEXT: lvl %s1 ; CHECK-NEXT: vld %v0, 8, %s0 -; CHECK-NEXT: vstl2d.nc.ot %v0, 8, %s0, %vm0 +; CHECK-NEXT: vstl2d.nc.ot %v0, 8, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) %2 = tail call fast <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %0, i32 256) tail call void @llvm.ve.vl.vstl2dncot.vssml(<256 x double> %2, i64 8, i8* %0, <256 x i1> undef, i32 256)