Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -1551,14 +1551,42 @@ return true; } case TargetOpcode::G_EXTRACT: { - LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); - LLT DstTy = MRI.getType(I.getOperand(0).getReg()); + Register DstReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + LLT DstTy = MRI.getType(DstReg); (void)DstTy; unsigned SrcSize = SrcTy.getSizeInBits(); - // Larger extracts are vectors, same-size extracts should be something else - // by now (either split up or simplified to a COPY). - if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32) - return false; + + if (SrcTy.getSizeInBits() > 64) { + // This should be an extract of an s128, which is like a vector extract. + if (SrcTy.getSizeInBits() != 128) + return false; + // Only support extracting 64 bits from an s128 at the moment. + if (DstTy.getSizeInBits() != 64) + return false; + + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); + const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + // Check we have the right regbank always. + assert(SrcRB.getID() == AArch64::FPRRegBankID && + DstRB.getID() == AArch64::FPRRegBankID && + "Wrong extract regbank!"); + + // Emit the same code as a vector extract. + // Offset must be a multiple of 64. + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 64 != 0) + return false; + unsigned LaneIdx = Offset / 64; + MachineIRBuilder MIB(I); + MachineInstr *Extract = emitExtractVectorElt( + DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); + if (!Extract) + return false; + I.eraseFromParent(); + return true; + } I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + @@ -1570,7 +1598,7 @@ return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - Register DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); + DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) .addReg(DstReg, 0, AArch64::sub_32); @@ -1928,6 +1956,16 @@ constrainSelectedInstRegOperands(I, TII, TRI, RBI); return true; } + + if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { + MachineIRBuilder MIB(I); + MachineInstr *Extract = emitExtractVectorElt( + DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); + if (!Extract) + return false; + I.eraseFromParent(); + return true; + } } return false; @@ -2590,16 +2628,40 @@ const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); + const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); - // At the moment we only support merging two s32s into an s64. if (I.getNumOperands() != 3) return false; - if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) - return false; - const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); + + // Merging 2 s64s into an s128. + if (DstTy == LLT::scalar(128)) { + if (SrcTy.getSizeInBits() != 64) + return false; + MachineIRBuilder MIB(I); + Register DstReg = I.getOperand(0).getReg(); + Register Src1Reg = I.getOperand(1).getReg(); + Register Src2Reg = I.getOperand(2).getReg(); + auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); + MachineInstr *InsMI = + emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); + if (!InsMI) + return false; + MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), + Src2Reg, /* LaneIdx */ 1, RB, MIB); + if (!Ins2MI) + return false; + constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); + constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); + I.eraseFromParent(); + return true; + } + if (RB.getID() != AArch64::GPRRegBankID) return false; + if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) + return false; + auto *DstRC = &AArch64::GPR64RegClass; Register SubToRegDef = MRI.createVirtualRegister(DstRC); MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), Index: llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -193,14 +193,14 @@ .legalIf([=](const LegalityQuery &Query) { const LLT &Ty0 = Query.Types[0]; const LLT &Ty1 = Query.Types[1]; - if (Ty1 != s32 && Ty1 != s64) + if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) return false; if (Ty1 == p0) return true; return isPowerOf2_32(Ty0.getSizeInBits()) && (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); }) - .clampScalar(1, s32, s64) + .clampScalar(1, s32, s128) .widenScalarToNextPow2(1) .maxScalarIf(typeInSet(1, {s32}), 0, s16) .maxScalarIf(typeInSet(1, {s64}), 0, s32) @@ -238,6 +238,7 @@ {s32, p0, 32, 8}, {s64, p0, 64, 8}, {p0, p0, 64, 8}, + {s128, p0, 128, 8}, {v8s8, p0, 64, 8}, {v16s8, p0, 128, 8}, {v4s16, p0, 64, 8}, @@ -267,6 +268,7 @@ {s32, p0, 32, 8}, {s64, p0, 64, 8}, {p0, p0, 64, 8}, + {s128, p0, 128, 8}, {v16s8, p0, 128, 8}, {v4s16, p0, 64, 8}, {v8s16, p0, 128, 8}, Index: llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -635,6 +635,12 @@ // Some of the floating-point instructions have mixed GPR and FPR operands: // fine-tune the computed mapping. switch (Opc) { + case TargetOpcode::G_TRUNC: { + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + break; + } case TargetOpcode::G_SITOFP: case TargetOpcode::G_UITOFP: if (MRI.getType(MI.getOperand(0).getReg()).isVector()) @@ -793,6 +799,15 @@ // Index needs to be a GPR. OpRegBankIdx[3] = PMI_FirstGPR; break; + case TargetOpcode::G_EXTRACT: { + // For s128 sources we have to use fpr. + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + if (SrcTy.getSizeInBits() == 128) { + OpRegBankIdx[0] = PMI_FirstFPR; + OpRegBankIdx[1] = PMI_FirstFPR; + } + break; + } case TargetOpcode::G_BUILD_VECTOR: // If the first source operand belongs to a FPR register bank, then make // sure that we preserve that. Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -205,7 +205,7 @@ ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %4:_(s64) = G_EXTRACT %3:_(s96), 0 (in function: nonpow2_store_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %5:fpr32(s32) = G_EXTRACT %21:fpr(s128), 64 (in function: nonpow2_store_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing: define void @nonpow2_store_narrowing(i96* %c) { Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -1,88 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer %s -o - | FileCheck %s ---- -name: test_extracts_1 -body: | - bb.0: - liveins: $w0 - - ; Low part of extraction takes entirity of the low register entirely, so - ; value stored is forwarded directly from first load. - - ; CHECK-LABEL: name: test_extracts_1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8) - ; CHECK: RET_ReallyLR - %0:_(s64) = COPY $x0 - %1:_(s32) = COPY $w1 - %2:_(p0) = COPY $x2 - %3:_(s128) = G_LOAD %2(p0) :: (load 16) - %4:_(s64) = G_EXTRACT %3(s128), 0 - G_STORE %4(s64), %2(p0) :: (store 8) - RET_ReallyLR -... - ---- -name: test_extracts_2 -body: | - bb.0: - liveins: $w0 - - ; Low extraction wipes takes whole low register. High extraction is real. - ; CHECK-LABEL: name: test_extracts_2 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY $x2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 8, align 16) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s64), 0 - ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8) - ; CHECK: G_STORE [[EXTRACT]](s32), [[COPY2]](p0) :: (store 4) - ; CHECK: RET_ReallyLR - %0:_(s64) = COPY $x0 - %1:_(s32) = COPY $w1 - %2:_(p0) = COPY $x2 - %3:_(s128) = G_LOAD %2(p0) :: (load 16) - %4:_(s64) = G_EXTRACT %3(s128), 0 - %5:_(s32) = G_EXTRACT %3(s128), 64 - G_STORE %4(s64), %2(p0) :: (store 8) - G_STORE %5(s32), %2(p0) :: (store 4) - RET_ReallyLR -... - ---- -name: test_extracts_3 -body: | - bb.0: - liveins: $x0, $x1, $x2 - - - ; CHECK-LABEL: name: test_extracts_3 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32) - ; CHECK: $x0 = COPY [[MV]](s64) - ; CHECK: RET_ReallyLR - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(s128) = G_MERGE_VALUES %0, %1 - %3:_(s64) = G_EXTRACT %2, 32 - $x0 = COPY %3 - RET_ReallyLR -... --- name: test_extracts_4 Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -1,153 +1,13 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" - define void @test_inserts_1() { ret void } - define void @test_inserts_2() { ret void } - define void @test_inserts_3() { ret void } - define void @test_inserts_4() { ret void } - define void @test_inserts_5() { ret void } - define void @test_inserts_6() { ret void } define void @test_inserts_nonpow2() { ret void } ... --- -name: test_inserts_1 -body: | - bb.0: - liveins: $w0 - - ; Low part of insertion wipes out the old register entirely, so %0 gets - ; forwarded to the G_STORE. Hi part is unchanged so (split) G_LOAD gets - ; forwarded. - ; CHECK-LABEL: name: test_inserts_1 - ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD - ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD - ; CHECK: G_STORE %0(s64) - ; CHECK: G_STORE [[HI]] - %0:_(s64) = COPY $x0 - %1:_(s32) = COPY $w1 - %2:_(p0) = COPY $x2 - %3:_(s128) = G_LOAD %2(p0) :: (load 16) - %4:_(s128) = G_INSERT %3(s128), %0(s64), 0 - G_STORE %4(s128), %2(p0) :: (store 16) - RET_ReallyLR -... - ---- -name: test_inserts_2 -body: | - bb.0: - liveins: $w0 - - ; Low insertion wipes out the old register entirely, so %0 gets forwarded - ; to the G_STORE again. Second insertion is real. - ; CHECK-LABEL: name: test_inserts_2 - ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD - ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD - ; CHECK: [[NEWHI:%[0-9]+]]:_(s64) = G_INSERT [[HI]], %1(s32), 0 - ; CHECK: G_STORE %0(s64) - ; CHECK: G_STORE [[NEWHI]] - %0:_(s64) = COPY $x0 - %1:_(s32) = COPY $w1 - %2:_(p0) = COPY $x2 - %3:_(s128) = G_LOAD %2(p0) :: (load 16) - %4:_(s128) = G_INSERT %3(s128), %0(s64), 0 - %5:_(s128) = G_INSERT %4(s128), %1(s32), 64 - G_STORE %5(s128), %2(p0) :: (store 16) - RET_ReallyLR -... - ---- -name: test_inserts_3 -body: | - bb.0: - liveins: $w0 - - ; I'm not entirely convinced inserting a p0 into an s64 is valid, but it's - ; certainly better than the alternative of directly forwarding the value - ; which would cause a nasty type mismatch. - ; CHECK-LABEL: name: test_inserts_3 - ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD - ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD - ; CHECK: [[NEWLO:%[0-9]+]]:_(s64) = G_PTRTOINT %0(p0) - ; CHECK: G_STORE [[NEWLO]](s64) - ; CHECK: G_STORE [[HI]] - %0:_(p0) = COPY $x0 - %1:_(s32) = COPY $w1 - %2:_(p0) = COPY $x2 - %3:_(s128) = G_LOAD %2(p0) :: (load 16) - %4:_(s128) = G_INSERT %3(s128), %0(p0), 0 - G_STORE %4(s128), %2(p0) :: (store 16) - RET_ReallyLR -... - ---- -name: test_inserts_4 -body: | - bb.0: - liveins: $w0 - - ; A narrow insert gets surrounded by a G_ANYEXT/G_TRUNC pair. - ; CHECK-LABEL: name: test_inserts_4 - ; CHECK: [[VALEXT:%[0-9]+]]:_(s32) = COPY %2(s32) - ; CHECK: [[VAL:%[0-9]+]]:_(s32) = G_INSERT [[VALEXT]], %1(s1), 0 - ; CHECK: %5:_(s8) = G_TRUNC [[VAL]](s32) - %4:_(s32) = COPY $w0 - %0:_(s1) = G_TRUNC %4 - %5:_(s32) = COPY $w1 - %1:_(s8) = G_TRUNC %5 - %2:_(p0) = COPY $x2 - %3:_(s8) = G_INSERT %1(s8), %0(s1), 0 - G_STORE %3(s8), %2(p0) :: (store 1) - RET_ReallyLR -... - ---- -name: test_inserts_5 -body: | - bb.0: - liveins: $x0, $x1, $x2 - - - ; CHECK-LABEL: name: test_inserts_5 - ; CHECK: [[INS_LO:%[0-9]+]]:_(s32) = G_EXTRACT %2(s64), 0 - ; CHECK: [[VAL_LO:%[0-9]+]]:_(s64) = G_INSERT %0, [[INS_LO]](s32), 32 - ; CHECK: [[INS_HI:%[0-9]+]]:_(s32) = G_EXTRACT %2(s64), 32 - ; CHECK: [[VAL_HI:%[0-9]+]]:_(s64) = G_INSERT %1, [[INS_HI]](s32), 0 - ; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), [[VAL_HI]](s64) - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(s64) = COPY $x2 - %3:_(s128) = G_MERGE_VALUES %0, %1 - %4:_(s128) = G_INSERT %3, %2, 32 - %5:_(s64) = G_TRUNC %4 - $x0 = COPY %5 - RET_ReallyLR -... - ---- -name: test_inserts_6 -body: | - bb.0: - liveins: $x0, $x1, $x2 - - - ; CHECK-LABEL: name: test_inserts_6 - ; CHECK: [[VAL_LO:%[0-9]+]]:_(s64) = G_INSERT %0, %2(s32), 32 - ; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), %1(s64) - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(s32) = COPY $w2 - %3:_(s128) = G_MERGE_VALUES %0, %1 - %4:_(s128) = G_INSERT %3, %2, 32 - %5:_(s64) = G_TRUNC %4 - $x0 = COPY %5 - RET_ReallyLR -... - ---- name: test_inserts_nonpow2 body: | bb.0: Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store-s128-unaligned.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store-s128-unaligned.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store-s128-unaligned.mir @@ -1,30 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -march=aarch64 -o - -run-pass=legalizer %s | FileCheck %s ---- -name: loadstore128_align4 -exposesReturnsTwice: false -legalized: false -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0, $x1 - - ; CHECK-LABEL: name: loadstore128_align4 - ; CHECK: liveins: $x0, $x1 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 4) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8, align 4) - ; CHECK: G_STORE [[LOAD]](s64), [[COPY1]](p0) :: (store 8, align 4) - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[COPY1]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s64), [[GEP1]](p0) :: (store 8, align 4) - ; CHECK: RET_ReallyLR - %0:_(p0) = COPY $x0 - %1:_(p0) = COPY $x1 - %2:_(s128) = G_LOAD %0(p0) :: (load 16, align 4) - G_STORE %2(s128), %1(p0) :: (store 16, align 4) - RET_ReallyLR - -... Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -80,12 +80,8 @@ ; CHECK: [[LOAD5:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load 8) ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[LOAD5]](<2 x s32>) ; CHECK: $x0 = COPY [[BITCAST]](s64) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8, align 16) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C]](s64) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 8) - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD6]](s64), [[LOAD7]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[MV]](s128) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load 16) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD6]](s128) ; CHECK: $x0 = COPY [[TRUNC]](s64) %0:_(p0) = COPY $x0 %1:_(s1) = G_LOAD %0(p0) :: (load 1) @@ -135,10 +131,8 @@ ; CHECK: G_STORE [[PTRTOINT]](s64), [[COPY]](p0) :: (store 8) ; CHECK: G_STORE [[COPY]](p0), [[COPY]](p0) :: (store 8) ; CHECK: [[PTRTOINT1:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY]](p0) - ; CHECK: G_STORE [[PTRTOINT1]](s64), [[COPY]](p0) :: (store 8, align 16) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[PTRTOINT1]](s64), [[GEP]](p0) :: (store 8) + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[PTRTOINT1]](s64), [[PTRTOINT1]](s64) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) %0:_(p0) = COPY $x0 %1:_(s32) = COPY $w1 %2:_(s1) = G_TRUNC %1(s32) Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir @@ -0,0 +1,23 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s +--- +name: extract_s64_s128 +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: extract_s64_s128 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s128) = COPY $q0 + ; CHECK: [[EXTRACT:%[0-9]+]]:fpr(s64) = G_EXTRACT [[COPY]](s128), 0 + ; CHECK: $d2 = COPY [[EXTRACT]](s64) + ; CHECK: RET_ReallyLR implicit $d2 + %0:_(s128) = COPY $q0 + %1:_(s64) = G_EXTRACT %0(s128), 0 + $d2 = COPY %1(s64) + RET_ReallyLR implicit $d2 + +... Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-trunc-s128.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-trunc-s128.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-trunc-s128.mir @@ -0,0 +1,23 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s +--- +name: trunc_s64_s128 +alignment: 2 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: trunc_s64_s128 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s128) = COPY $q0 + ; CHECK: [[TRUNC:%[0-9]+]]:fpr(s64) = G_TRUNC [[COPY]](s128) + ; CHECK: $d2 = COPY [[TRUNC]](s64) + ; CHECK: RET_ReallyLR implicit $d2 + %0:_(s128) = COPY $q0 + %1:_(s64) = G_TRUNC %0(s128) + $d2 = COPY %1(s64) + RET_ReallyLR implicit $d2 + +... Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-extract.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=instruction-select %s -o - | FileCheck %s +... +--- +name: extract_64_128 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: extract_64_128 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub + ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1 + ; CHECK: $d3 = COPY [[COPY1]] + ; CHECK: $d4 = COPY [[CPYi64_]] + ; CHECK: RET_ReallyLR implicit $d3 + %0:fpr(s128) = COPY $q0 + %2:fpr(s64) = G_EXTRACT %0(s128), 0 + %3:fpr(s64) = G_EXTRACT %0(s128), 64 + $d3 = COPY %2(s64) + $d4 = COPY %3(s64) + RET_ReallyLR implicit $d3 + +... Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-trunc.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-trunc.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-trunc.mir @@ -7,6 +7,8 @@ define void @trunc_s32_s64() { ret void } define void @trunc_s8_s64() { ret void } define void @trunc_s1_s32() { ret void } + define void @trunc_s64_s128() { ret void } + define void @trunc_s32_s128() { ret void } ... --- @@ -70,10 +72,52 @@ ; CHECK-LABEL: name: trunc_s1_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY]] - ; CHECK: $w0 = COPY [[COPY2]] + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]] + ; CHECK: $w0 = COPY [[COPY1]] %0(s32) = COPY $w0 %1(s1) = G_TRUNC %0 %2:gpr(s32) = G_ANYEXT %1 $w0 = COPY %2(s32) ... + +--- +name: trunc_s64_s128 +legalized: true +regBankSelected: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: trunc_s64_s128 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub + ; CHECK: $x0 = COPY [[COPY1]] + %0(s128) = COPY $q0 + %1(s64) = G_TRUNC %0 + $x0 = COPY %1(s64) +... + +--- +name: trunc_s32_s128 +legalized: true +regBankSelected: true +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: trunc_s32_s128 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub + ; CHECK: $w0 = COPY [[COPY1]] + %0(s128) = COPY $q0 + %1(s32) = G_TRUNC %0 + $w0 = COPY %1(s32) +...