diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1927,6 +1927,9 @@ assert(MulImm.getBitWidth() == VT.getSizeInBits() && "APInt size does not match type size!"); + if (MulImm == 0) + return getConstant(0, DL, VT); + if (ConstantFold) { const MachineFunction &MF = getMachineFunction(); const Function &F = MF.getFunction(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4192,7 +4192,7 @@ Type *Ty = I.getType(); SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) @@ -4250,9 +4250,14 @@ ChainI = 0; } - SDValue A = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(Offsets[i])); - SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, - MachinePointerInfo(SV, Offsets[i]), Alignment, + // TODO: MachinePointerInfo only supports a fixed length offset. + MachinePointerInfo PtrInfo = + !Offsets[i].isScalable() || Offsets[i].isZero() + ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue()) + : MachinePointerInfo(); + + SDValue A = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); + SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, PtrInfo, Alignment, MMOFlags, AAInfo, Ranges); Chains[ChainI] = L.getValue(1); @@ -4354,7 +4359,7 @@ } SmallVector ValueVTs, MemVTs; - SmallVector Offsets; + SmallVector Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); @@ -4385,13 +4390,18 @@ ChainI = 0; } - SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(Offsets[i])); + // TODO: MachinePointerInfo only supports a fixed length offset. + MachinePointerInfo PtrInfo = + !Offsets[i].isScalable() || Offsets[i].isZero() + ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue()) + : MachinePointerInfo(); + + SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); SDValue St = - DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), - Alignment, MMOFlags, AAInfo); + DAG.getStore(Root, dl, Val, Add, PtrInfo, Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll --- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll +++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll @@ -12,16 +12,13 @@ ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, #2 // =0x2 -; CHECK-NEXT: mov x9, #4 // =0x4 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x10, sp -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] -; CHECK-NEXT: st1d { z0.d }, p0, [x10, x8, lsl #3] -; CHECK-NEXT: st1d { z1.d }, p0, [x10, x9, lsl #3] ; CHECK-NEXT: st1d { z2.d }, p0, [sp] +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -473,7 +473,7 @@ entry: %ptr = getelementptr inbounds i32, i32* %addr, i64 %idx %bc = bitcast i32* %ptr to * - %ld = load , * %bc, align 16 + %ld = load volatile , * %bc, align 16 %out = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32( %ld, i64 0) ret <4 x i32> %out } diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll @@ -15,12 +15,13 @@ ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrrs a2, vlenb, zero ; CHECK-NEXT: vl1re64.v v8, (a0) -; CHECK-NEXT: addi a0, a0, 8 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: vl1re64.v v9, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) -; CHECK-NEXT: addi a2, sp, 24 +; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: vs1r.v v9, (a2) ; CHECK-NEXT: vl1re64.v v8, (a2) ; CHECK-NEXT: vl1re64.v v9, (a0)