diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13019,13 +13019,12 @@ return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops); } -/// Combines a node carrying the intrinsic `aarch64_sve_prf_gather` into a -/// node that uses `aarch64_sve_prf_gather_scaled_uxtw` when the scalar -/// offset passed to `aarch64_sve_prf_gather` is not a valid immediate for -/// the sve gather prefetch instruction with vector plus immediate addressing -/// mode. +/// Combines a node carrying the intrinsic +/// `aarch64_sve_prf_gather_scalar_offset` into a node that uses +/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to +/// `aarch64_sve_prf_gather_scalar_offset` is not a valid immediate for the +/// sve gather prefetch instruction with vector plus immediate addressing mode. static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, - unsigned NewIID, unsigned ScalarSizeInBytes) { const unsigned ImmPos = 4, OffsetPos = 3; // No need to combine the node if the immediate is valid... @@ -13035,10 +13034,11 @@ // ...otherwise swap the offset base with the offset... SmallVector Ops(N->op_begin(), N->op_end()); std::swap(Ops[ImmPos], Ops[OffsetPos]); - // ...and remap the intrinsic `aarch64_sve_prf_gather` to - // `aarch64_sve_prf_gather_scaled_uxtw`. + // ...and remap the intrinsic `aarch64_sve_prf_gather_scalar_offset` to + // `aarch64_sve_prfb_gather_uxtw_index`. SDLoc DL(N); - Ops[1] = DAG.getConstant(NewIID, DL, MVT::i64); + Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL, + MVT::i64); return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops); } @@ -13108,21 +13108,13 @@ case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { case Intrinsic::aarch64_sve_prfb_gather_scalar_offset: - return combineSVEPrefetchVecBaseImmOff( - N, DAG, Intrinsic::aarch64_sve_prfb_gather_uxtw_index, - 1 /*=ScalarSizeInBytes*/); + return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/); case Intrinsic::aarch64_sve_prfh_gather_scalar_offset: - return combineSVEPrefetchVecBaseImmOff( - N, DAG, Intrinsic::aarch64_sve_prfh_gather_uxtw_index, - 2 /*=ScalarSizeInBytes*/); + return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/); case Intrinsic::aarch64_sve_prfw_gather_scalar_offset: - return combineSVEPrefetchVecBaseImmOff( - N, DAG, Intrinsic::aarch64_sve_prfw_gather_uxtw_index, - 4 /*=ScalarSizeInBytes*/); + return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/); case Intrinsic::aarch64_sve_prfd_gather_scalar_offset: - return combineSVEPrefetchVecBaseImmOff( - N, DAG, Intrinsic::aarch64_sve_prfd_gather_uxtw_index, - 8 /*=ScalarSizeInBytes*/); + return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/); case Intrinsic::aarch64_sve_prfb_gather_uxtw_index: case Intrinsic::aarch64_sve_prfb_gather_sxtw_index: case Intrinsic::aarch64_sve_prfh_gather_uxtw_index: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll @@ -59,7 +59,7 @@ ; PRFH , , [.S{, #}] -> 32-bit element, imm = 0, 2, ..., 62 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset( %bases, i64 %offset, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset: -; CHECK-NEXT: prfh pldl1strm, p0, [x0, z0.s, uxtw #1] +; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 %offset, i32 1) ret void @@ -68,7 +68,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound: ; CHECK-NEXT: mov w[[N:[0-9]+]], #63 -; CHECK-NEXT: prfh pldl1strm, p0, [x[[N]], z0.s, uxtw #1] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 63, i32 1) ret void @@ -77,7 +77,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound: ; CHECK-NEXT: mov x[[N:[0-9]+]], #-1 -; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 -1, i32 1) ret void @@ -86,7 +86,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2: ; CHECK-NEXT: mov w[[N:[0-9]+]], #33 -; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 33, i32 1) ret void @@ -95,8 +95,8 @@ ; PRFH , , [.D{, #}] -> 64-bit element, imm = 0, 2, ..., 62 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset( %bases, i64 %offset, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset: -; CHECK-NEXT: prfh pldl1strm, p0, [x0, z0.d, uxtw #1] -; CHECK-NEXT: ret +; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.d, uxtw] +; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 %offset, i32 1) ret void } @@ -104,7 +104,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound: ; CHECK-NEXT: mov w[[N:[0-9]+]], #63 -; CHECK-NEXT: prfh pldl1strm, p0, [x[[N]], z0.d, uxtw #1] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 63, i32 1) ret void @@ -113,7 +113,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound: ; CHECK-NEXT: mov x[[N:[0-9]+]], #-1 -; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 -1, i32 1) ret void @@ -122,7 +122,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2: ; CHECK-NEXT: mov w[[N:[0-9]+]], #33 -; CHECK-NEXT: prfh pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 33, i32 1) ret void @@ -133,7 +133,7 @@ ; PRFW , , [.S{, #}] -> 32-bit element, imm = 0, 4, ..., 124 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset( %bases, i64 %offset, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset: -; CHECK-NEXT: prfw pldl1strm, p0, [x0, z0.s, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 %offset, i32 1) ret void @@ -142,7 +142,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound: ; CHECK-NEXT: mov w[[N:[0-9]+]], #125 -; CHECK-NEXT: prfw pldl1strm, p0, [x[[N]], z0.s, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 125, i32 1) ret void @@ -151,7 +151,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound: ; CHECK-NEXT: mov x[[N:[0-9]+]], #-1 -; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 -1, i32 1) ret void @@ -160,7 +160,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4: ; CHECK-NEXT: mov w[[N:[0-9]+]], #33 -; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 33, i32 1) ret void @@ -169,7 +169,7 @@ ; PRFW , , [.D{, #}] -> 64-bit element, imm = 0, 4, ..., 124 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset( %bases, i64 %offset, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset: -; CHECK-NEXT: prfw pldl1strm, p0, [x0, z0.d, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 %offset, i32 1) ret void @@ -178,7 +178,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound: ; CHECK-NEXT: mov w[[N:[0-9]+]], #125 -; CHECK-NEXT: prfw pldl1strm, p0, [x[[N]], z0.d, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 125, i32 1) ret void @@ -187,7 +187,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound: ; CHECK-NEXT: mov x[[N:[0-9]+]], #-1 -; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 -1, i32 1) ret void @@ -196,7 +196,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4: ; CHECK-NEXT: mov w[[N:[0-9]+]], #33 -; CHECK-NEXT: prfw pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 33, i32 1) ret void @@ -207,7 +207,7 @@ ; PRFD , , [.S{, #}] -> 32-bit element, imm = 0, 8, ..., 248 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset( %bases, i64 %offset, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset: -; CHECK-NEXT: prfd pldl1strm, p0, [x0, z0.s, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 %offset, i32 1) ret void @@ -216,7 +216,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound: ; CHECK-NEXT: mov w[[N:[0-9]+]], #125 -; CHECK-NEXT: prfd pldl1strm, p0, [x[[N]], z0.s, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 125, i32 1) ret void @@ -225,7 +225,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound: ; CHECK-NEXT: mov x[[N:[0-9]+]], #-1 -; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 -1, i32 1) ret void @@ -234,7 +234,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8: ; CHECK-NEXT: mov w[[N:[0-9]+]], #33 -; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32( %Pg, %bases, i64 33, i32 1) ret void @@ -243,7 +243,7 @@ ; PRFD , , [.D{, #}] -> 64-bit element, imm = 0, 4, ..., 248 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset( %bases, i64 %offset, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset: -; CHECK-NEXT: prfd pldl1strm, p0, [x0, z0.d, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x0, z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 %offset, i32 1) ret void @@ -252,7 +252,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound: ; CHECK-NEXT: mov w[[N:[0-9]+]], #125 -; CHECK-NEXT: prfd pldl1strm, p0, [x[[N]], z0.d, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 125, i32 1) ret void @@ -261,7 +261,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound: ; CHECK-NEXT: mov x[[N:[0-9]+]], #-1 -; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 -1, i32 1) ret void @@ -270,7 +270,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8( %bases, %Pg) nounwind { ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8: ; CHECK-NEXT: mov w[[N:[0-9]+]], #33 -; CHECK-NEXT: prfd pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3] +; CHECK-NEXT: prfb pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64( %Pg, %bases, i64 33, i32 1) ret void