Diff 477675

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,490 Lines • ▼ Show 20 Lines	case Intrinsic::riscv_masked_strided_load: {
auto *Load = cast<MemIntrinsicSDNode>(Op);		auto *Load = cast<MemIntrinsicSDNode>(Op);
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;		SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
SDValue Ptr = Op.getOperand(3);		SDValue Ptr = Op.getOperand(3);
SDValue Stride = Op.getOperand(4);		SDValue Stride = Op.getOperand(4);
SDValue Result, Chain;		SDValue Result, Chain;

// TODO: We restrict this to unmasked loads currently in consideration of		// TODO: We restrict this to unmasked loads currently in consideration of
// the complexity of hanlding all falses masks.		// the complexity of hanlding all falses masks.
if (IsUnmasked && isNullConstant(Stride) &&		if (IsUnmasked && isNullConstant(Stride)) {
!Subtarget.hasOptimizedZeroStrideLoad()) {
MVT ScalarVT = ContainerVT.getVectorElementType();		MVT ScalarVT = ContainerVT.getVectorElementType();
SDValue ScalarLoad =		SDValue ScalarLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,		DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
ScalarVT, Load->getMemOperand());		ScalarVT, Load->getMemOperand());
Chain = ScalarLoad.getValue(1);		Chain = ScalarLoad.getValue(1);
Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,		Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
Subtarget);		Subtarget);
} else {		} else {
▲ Show 20 Lines • Show All 7,938 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll

Show First 20 Lines • Show All 171 Lines • ▼ Show 20 Lines	vector.body: ; preds = %vector.body, %entry
%6 = icmp eq i64 %index.next, 1024		%6 = icmp eq i64 %index.next, 1024
br i1 %6, label %for.cond.cleanup, label %vector.body		br i1 %6, label %for.cond.cleanup, label %vector.body

for.cond.cleanup: ; preds = %vector.body		for.cond.cleanup: ; preds = %vector.body
ret void		ret void
}		}

define void @gather_zero_stride(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) {		define void @gather_zero_stride(i8* noalias nocapture %A, i8* noalias nocapture readonly %B) {
;		;
; V-LABEL: gather_zero_stride:		; CHECK-LABEL: gather_zero_stride:
		reamesUnsubmitted Done Reply Inline Actions This looks like a regression. reames: This looks like a regression.
		pcwang-theadAuthorUnsubmitted Done Reply Inline Actions The diff result seems weird, but I think it's what we expected. I added a new test `gather_zero_stride_unfold` (copied from `gather_zero_stride`) in which I changed `%4 = add <32 x i8> %wide.load, %wide.masked.gather` to `%4 = udiv <32 x i8> %wide.masked.gather, %wide.load` so that splat can't be folded since division is not commutative. So here are scalar load(`lbu a5, 0(a1)`) and vector splat(`vmv.v.x v9, a5`). pcwang-thead: The diff result seems weird, but I think it's what we expected. I added a new test…
		reamesUnsubmitted Done Reply Inline Actions I got confused here by the diff. I was responding as if this was the optimized check, which it isn't. So ignore me here. reames: I got confused here by the diff. I was responding as if this was the optimized check, which it…
; V: # %bb.0: # %entry		; CHECK: # %bb.0: # %entry
; V-NEXT: li a2, 0		; CHECK-NEXT: li a2, 0
; V-NEXT: li a3, 32		; CHECK-NEXT: li a3, 32
; V-NEXT: li a4, 1024		; CHECK-NEXT: li a4, 1024
; V-NEXT: .LBB3_1: # %vector.body		; CHECK-NEXT: .LBB3_1: # %vector.body
; V-NEXT: # =>This Inner Loop Header: Depth=1		; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma		; CHECK-NEXT: lbu a5, 0(a1)
; V-NEXT: vlse8.v v8, (a1), zero		; CHECK-NEXT: add a6, a0, a2
; V-NEXT: add a5, a0, a2		; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; V-NEXT: vle8.v v9, (a5)		; CHECK-NEXT: vle8.v v8, (a6)
; V-NEXT: vadd.vv v8, v9, v8		; CHECK-NEXT: vadd.vx v8, v8, a5
; V-NEXT: vse8.v v8, (a5)		; CHECK-NEXT: vse8.v v8, (a6)
; V-NEXT: addi a2, a2, 32		; CHECK-NEXT: addi a2, a2, 32
; V-NEXT: addi a1, a1, 160		; CHECK-NEXT: addi a1, a1, 160
; V-NEXT: bne a2, a4, .LBB3_1		; CHECK-NEXT: bne a2, a4, .LBB3_1
; V-NEXT: # %bb.2: # %for.cond.cleanup		; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
; V-NEXT: ret		; CHECK-NEXT: ret
;
; ZVE32F-LABEL: gather_zero_stride:
; ZVE32F: # %bb.0: # %entry
; ZVE32F-NEXT: li a2, 0
; ZVE32F-NEXT: li a3, 32
; ZVE32F-NEXT: li a4, 1024
; ZVE32F-NEXT: .LBB3_1: # %vector.body
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; ZVE32F-NEXT: vlse8.v v8, (a1), zero
; ZVE32F-NEXT: add a5, a0, a2
; ZVE32F-NEXT: vle8.v v9, (a5)
; ZVE32F-NEXT: vadd.vv v8, v9, v8
; ZVE32F-NEXT: vse8.v v8, (a5)
; ZVE32F-NEXT: addi a2, a2, 32
; ZVE32F-NEXT: addi a1, a1, 160
; ZVE32F-NEXT: bne a2, a4, .LBB3_1
; ZVE32F-NEXT: # %bb.2: # %for.cond.cleanup
; ZVE32F-NEXT: ret
;
; NOT-OPTIMIZED-LABEL: gather_zero_stride:
; NOT-OPTIMIZED: # %bb.0: # %entry
; NOT-OPTIMIZED-NEXT: li a2, 0
; NOT-OPTIMIZED-NEXT: li a3, 32
; NOT-OPTIMIZED-NEXT: li a4, 1024
; NOT-OPTIMIZED-NEXT: .LBB3_1: # %vector.body
; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1
; NOT-OPTIMIZED-NEXT: lbu a5, 0(a1)
; NOT-OPTIMIZED-NEXT: add a6, a0, a2
; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; NOT-OPTIMIZED-NEXT: vle8.v v8, (a6)
; NOT-OPTIMIZED-NEXT: vadd.vx v8, v8, a5
; NOT-OPTIMIZED-NEXT: vse8.v v8, (a6)
; NOT-OPTIMIZED-NEXT: addi a2, a2, 32
; NOT-OPTIMIZED-NEXT: addi a1, a1, 160
; NOT-OPTIMIZED-NEXT: bne a2, a4, .LBB3_1
; NOT-OPTIMIZED-NEXT: # %bb.2: # %for.cond.cleanup
; NOT-OPTIMIZED-NEXT: ret
entry:		entry:
br label %vector.body		br label %vector.body

vector.body: ; preds = %vector.body, %entry		vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]		%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%vec.ind = phi <32 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ]		%vec.ind = phi <32 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ]
%0 = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>		%0 = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
%1 = getelementptr inbounds i8, i8* %B, <32 x i64> %0		%1 = getelementptr inbounds i8, i8* %B, <32 x i64> %0
▲ Show 20 Lines • Show All 804 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Lower unmasked zero-stride vector load to (scalar load + splat)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 477675

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Lower unmasked zero-stride vector load to (scalar load + splat)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 477675

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll

[RISCV] Lower unmasked zero-stride vector load to (scalar load + splat)
ClosedPublic