Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14558,6 +14558,57 @@ return SDValue(); } +static int64_t getShiftImm(SDNode *Shift) { + if (auto *Imm = dyn_cast(Shift->getOperand(1))) + return Imm->getSExtValue(); + return 0; +} + +static SDValue performShiftCombine(SDNode *N, SelectionDAG &DAG) { + // Match ({VASHR|VLSHR} (VSHL (VLSHR Op X) X) X) + // This can be folded to just ({VASHR|VLSHR} Op X) + SDNode *Shift1 = N; + unsigned Shift1Opc = Shift1->getOpcode(); + if (Shift1Opc != AArch64ISD::VASHR && Shift1Opc != AArch64ISD::VLSHR) + return SDValue(); + + SDNode *Shift2 = Shift1->getOperand(0).getNode(); + unsigned Shift2Opc = Shift2->getOpcode(); + if (Shift2Opc != AArch64ISD::VSHL) + return SDValue(); + + SDNode *Shift3 = Shift2->getOperand(0).getNode(); + unsigned Shift3Opc = Shift3->getOpcode(); + if (Shift3Opc != AArch64ISD::VLSHR) + return SDValue(); + + // Check that all instructions shift by the same number of bits. + if (int64_t Shift1Imm = getShiftImm(Shift1)) { + // Negative shifts are not supported. + if (Shift1Imm < 0) + return SDValue(); + if (Shift1Imm != getShiftImm(Shift2) || Shift1Imm != getShiftImm(Shift3)) + return SDValue(); + } else { + // Shift by a non-constant or zero + return SDValue(); + } + + // Check that there are no other uses of inner shift instructions + for (SDNode *User : Shift2->uses()) { + if (User != Shift1) + return SDValue(); + } + for (SDNode *User : Shift3->uses()) { + if (User != Shift2) + return SDValue(); + } + + SDValue Ops[] = {Shift3->getOperand(0), Shift1->getOperand(1)}; + return DAG.getNode(Shift1->getOpcode(), SDLoc(Shift1), + Shift1->getValueType(0), Ops); +} + /// Target-specific DAG combine function for post-increment LD1 (lane) and /// post-increment LD1R. static SDValue performPostLD1Combine(SDNode *N, @@ -16034,6 +16085,9 @@ case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO: case AArch64ISD::GLD1S_IMM_MERGE_ZERO: return performGLD1Combine(N, DAG); + case AArch64ISD::VASHR: + case AArch64ISD::VLSHR: + return performShiftCombine(N, DAG); case ISD::INSERT_VECTOR_ELT: return performInsertVectorEltCombine(N, DCI); case ISD::EXTRACT_VECTOR_ELT: Index: llvm/test/CodeGen/AArch64/aarch64-bswap-ext.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-bswap-ext.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O2 -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define <2 x i32> @test1(<2 x i16> %v2i16) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: +; CHECK-NEXT: rev32 v0.8b, v0.8b +; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: ret + %v2i16_rev = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %v2i16) + %v2i32 = sext <2 x i16> %v2i16_rev to <2 x i32> + ret <2 x i32> %v2i32 +} + +define <2 x float> @test2(<2 x i16> %v2i16) { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: +; CHECK-NEXT: rev32 v0.8b, v0.8b +; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: scvtf v0.2s, v0.2s +; CHECK-NEXT: ret + %v2i16_rev = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %v2i16) + %v2f32 = sitofp <2 x i16> %v2i16_rev to <2 x float> + ret <2 x float> %v2f32 +} + +declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone