diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp --- a/llvm/lib/Target/VE/LVLGen.cpp +++ b/llvm/lib/Target/VE/LVLGen.cpp @@ -68,6 +68,12 @@ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { MachineBasicBlock::iterator MI = I; + // Check whether MI uses a vector length operand. If so, we prepare for VL + // register. We would like to reuse VL register as much as possible. We + // also would like to keep the number of LEA instructions as fewer as + // possible. Therefore, we use a regular scalar register to hold immediate + // values to load VL register. And try to reuse identical scalar registers + // to avoid new LVLr instructions as much as possible. unsigned Reg = getVL(*MI); if (Reg != VE::NoRegister) { LLVM_DEBUG(dbgs() << "Vector instruction found: "); @@ -78,6 +84,8 @@ << ". "); if (!HasRegForVL || RegForVL != Reg) { + // Use VL, but a different value in a different scalar register. + // So, generate new LVL instruction just before the current instruction. LLVM_DEBUG(dbgs() << "Generate a LVL instruction to load " << RegName(Reg) << ".\n"); BuildMI(MBB, I, MI->getDebugLoc(), TII->get(VE::LVLr)).addReg(Reg); @@ -87,18 +95,15 @@ } else { LLVM_DEBUG(dbgs() << "Reuse current VL.\n"); } - } else if (HasRegForVL) { - // Old VL is overwritten, so disable HasRegForVL. - if (MI->findRegisterDefOperandIdx(RegForVL, false, false, TRI) != -1) { - LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: "); - LLVM_DEBUG(MI->dump()); - HasRegForVL = false; - } } + // Check the update of a given scalar register holding an immediate value + // for VL register. Also, a call doesn't preserve VL register. if (HasRegForVL) { - // The latest VL is killed, so disable HasRegForVL. - if (MI->killsRegister(RegForVL, TRI)) { - LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is killed: "); + if (MI->definesRegister(RegForVL, TRI) || + MI->modifiesRegister(RegForVL, TRI) || + MI->killsRegister(RegForVL, TRI) || MI->isCall()) { + // The latest VL is needed to be updated, so disable HasRegForVL. + LLVM_DEBUG(dbgs() << RegName(RegForVL) << " is needed to be updated: "); LLVM_DEBUG(MI->dump()); HasRegForVL = false; } diff --git a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll --- a/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll +++ b/llvm/test/CodeGen/VE/VELIntrinsics/lvlgen.ll @@ -42,7 +42,6 @@ ; Check that no redundant 'lvl' is inserted when vector length does not change ; in a basic block. - ; Function Attrs: nounwind define void @stable_vl(i32 %evl, i8* %P, i8* %Q) { ; CHECK-LABEL: stable_vl: @@ -64,3 +63,43 @@ tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl) ret void } + +;;; Check the case we have a call in the middle of vector instructions. + +; Function Attrs: nounwind +define void @call_invl(i32 %evl, i8* %P, i8* %Q) { +; CHECK-LABEL: call_invl: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s18, 288(, %s11) # 8-byte Folded Spill +; CHECK-NEXT: st %s19, 296(, %s11) # 8-byte Folded Spill +; CHECK-NEXT: st %s20, 304(, %s11) # 8-byte Folded Spill +; CHECK-NEXT: or %s18, 0, %s1 +; CHECK-NEXT: and %s20, %s0, (32)0 +; CHECK-NEXT: lvl %s20 +; CHECK-NEXT: vld %v0, 8, %s1 +; CHECK-NEXT: or %s19, 0, %s2 +; CHECK-NEXT: vst %v0, 16, %s2 +; CHECK-NEXT: lea %s0, fun@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, fun@hi(, %s0) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lvl %s20 +; CHECK-NEXT: vld %v0, 16, %s18 +; CHECK-NEXT: vst %v0, 16, %s19 +; CHECK-NEXT: vld %v0, 8, %s18 +; CHECK-NEXT: vst %v0, 16, %s19 +; CHECK-NEXT: ld %s20, 304(, %s11) # 8-byte Folded Reload +; CHECK-NEXT: ld %s19, 296(, %s11) # 8-byte Folded Reload +; CHECK-NEXT: ld %s18, 288(, %s11) # 8-byte Folded Reload +; CHECK-NEXT: or %s11, 0, %s9 + %l0 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l0, i64 16, i8* %Q, i32 %evl) + call void @fun() + %l1 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 16, i8* %P, i32 %evl) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l1, i64 16, i8* %Q, i32 %evl) + %l2 = tail call <256 x double> @llvm.ve.vl.vld.vssl(i64 8, i8* %P, i32 %evl) + tail call void @llvm.ve.vl.vst.vssl(<256 x double> %l2, i64 16, i8* %Q, i32 %evl) + ret void +} + +declare void @fun()