Index: llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrFormats.td @@ -944,6 +944,21 @@ let MIOperandInfo = (ops GPR32, arith_extend64); } +def arith_extended_reg32_i32 : arith_extended_reg32; +def gi_arith_extended_reg32_i32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def arith_extended_reg32_i64 : arith_extended_reg32; +def gi_arith_extended_reg32_i64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def arith_extended_reg32to64_i64 : arith_extended_reg32to64; +def gi_arith_extended_reg32to64_i64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // Floating-point immediate. def fpimm16 : Operand, FPImmLeaf, mnemonic, OpNode> { + arith_extended_reg32_i32, mnemonic, OpNode> { let Inst{31} = 0; } def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + arith_extended_reg32to64_i64, mnemonic, OpNode> { let Inst{31} = 1; } } @@ -2289,11 +2304,11 @@ // Add/Subtract extended register let AddedComplexity = 1 in { def Wrx : BaseAddSubEReg, mnemonic, OpNode> { + arith_extended_reg32_i32, mnemonic, OpNode> { let Inst{31} = 0; } def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + arith_extended_reg32_i64, mnemonic, OpNode> { let Inst{31} = 1; } } Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td @@ -1035,10 +1035,10 @@ def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; let AddedComplexity = 1 in { -def : Pat<(sub GPR32sp:$R2, arith_extended_reg32:$R3), - (SUBSWrx GPR32sp:$R2, arith_extended_reg32:$R3)>; -def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), - (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; +def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), + (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; +def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), + (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; } // Because of the immediate format for add/sub-imm instructions, the Index: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -227,6 +227,14 @@ return selectShiftedRegister(Root); } + /// Instructions that accept extend modifiers like UXTW expect the register + /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a + /// subregister copy if necessary. Return either ExtReg, or the result of the + /// new copy. + Register narrowExtendRegIfNeeded(Register ExtReg, + MachineIRBuilder &MIB) const; + ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; + void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const; void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I) const; void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I) const; @@ -246,6 +254,11 @@ /// Return true if \p MI is a load or store of \p NumBytes bytes. bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; + /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit + /// register zeroed out. In other words, the result of MI has been explicitly + /// zero extended. + bool isDef32(const MachineInstr &MI) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -363,7 +376,7 @@ SubReg = AArch64::hsub; break; case 32: - if (RC == &AArch64::GPR32RegClass) + if (RC != &AArch64::FPR32RegClass) SubReg = AArch64::sub_32; else SubReg = AArch64::ssub; @@ -676,35 +689,35 @@ return false; } - // Is this a cross-bank copy? - if (DstRegBank.getID() != SrcRegBank.getID()) { - // If we're doing a cross-bank copy on different-sized registers, we need - // to do a bit more work. - unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); - unsigned DstSize = TRI.getRegSizeInBits(*DstRC); - - if (SrcSize > DstSize) { - // We're doing a cross-bank copy into a smaller register. We need a - // subregister copy. First, get a register class that's on the same bank - // as the destination, but the same size as the source. - const TargetRegisterClass *SubregRC = - getMinClassForRegBank(DstRegBank, SrcSize, true); - assert(SubregRC && "Didn't get a register class for subreg?"); - - // Get the appropriate subregister for the destination. - unsigned SubReg = 0; - if (!getSubRegForClass(DstRC, TRI, SubReg)) { - LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); - return false; - } + unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); + unsigned DstSize = TRI.getRegSizeInBits(*DstRC); - // Now, insert a subregister copy using the new register class. - selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); - return CheckCopy(); + // If we're doing a cross-bank copy on different-sized registers, we need + // to do a bit more work. + if (SrcSize > DstSize) { + // We're doing a cross-bank copy into a smaller register. We need a + // subregister copy. First, get a register class that's on the same bank + // as the destination, but the same size as the source. + const TargetRegisterClass *SubregRC = + getMinClassForRegBank(DstRegBank, SrcSize, true); + assert(SubregRC && "Didn't get a register class for subreg?"); + + // Get the appropriate subregister for the destination. + unsigned SubReg = 0; + if (!getSubRegForClass(DstRC, TRI, SubReg)) { + LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); + return false; } - else if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && - SrcSize == 16) { + // Now, insert a subregister copy using the new register class. + selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); + return CheckCopy(); + } + + // Is this a cross-bank copy? + if (DstRegBank.getID() != SrcRegBank.getID()) { + if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && + SrcSize == 16) { // Special case for FPR16 to GPR32. // FIXME: This can probably be generalized like the above case. Register PromoteReg = @@ -4472,6 +4485,146 @@ [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; } +/// Get the correct ShiftExtendType for an extend instruction. +static AArch64_AM::ShiftExtendType +getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) { + unsigned Opc = MI.getOpcode(); + + // Handle explicit extend instructions first. + if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { + unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + assert(Size != 64 && "Extend from 64 bits?"); + switch (Size) { + case 8: + return AArch64_AM::SXTB; + case 16: + return AArch64_AM::SXTH; + case 32: + return AArch64_AM::SXTW; + default: + return AArch64_AM::InvalidShiftExtend; + } + } + + if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { + unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + assert(Size != 64 && "Extend from 64 bits?"); + switch (Size) { + case 8: + return AArch64_AM::UXTB; + case 16: + return AArch64_AM::UXTH; + case 32: + return AArch64_AM::UXTW; + default: + return AArch64_AM::InvalidShiftExtend; + } + } + + // Don't have an explicit extend. Try to handle a G_AND with a constant mask + // on the RHS. + if (Opc != TargetOpcode::G_AND) + return AArch64_AM::InvalidShiftExtend; + + Optional MaybeAndMask = getImmedFromMO(MI.getOperand(2)); + if (!MaybeAndMask) + return AArch64_AM::InvalidShiftExtend; + uint64_t AndMask = *MaybeAndMask; + switch (AndMask) { + default: + return AArch64_AM::InvalidShiftExtend; + case 0xFF: + return AArch64_AM::UXTB; + case 0xFFFF: + return AArch64_AM::UXTH; + case 0xFFFFFFFF: + return AArch64_AM::UXTW; + } +} + +Register AArch64InstructionSelector::narrowExtendRegIfNeeded( + Register ExtReg, MachineIRBuilder &MIB) const { + MachineRegisterInfo &MRI = *MIB.getMRI(); + if (MRI.getType(ExtReg).getSizeInBits() == 32) + return ExtReg; + + // Insert a copy to move ExtReg to GPR32. + Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); + + // Select the copy into a subregister copy. + selectCopy(*Copy, TII, MRI, TRI, RBI); + return Copy.getReg(0); +} + +/// Select an "extended register" operand. This operand folds in an extend +/// followed by an optional left shift. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectArithExtendedRegister( + MachineOperand &Root) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + uint64_t ShiftVal = 0; + Register ExtReg; + AArch64_AM::ShiftExtendType Ext; + MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); + if (!RootDef) + return None; + + if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) + return None; + + // Check if we can fold a shift and an extend. + if (RootDef->getOpcode() == TargetOpcode::G_SHL) { + // Look for a constant on the RHS of the shift. + MachineOperand &RHS = RootDef->getOperand(2); + Optional MaybeShiftVal = getImmedFromMO(RHS); + if (!MaybeShiftVal) + return None; + ShiftVal = *MaybeShiftVal; + if (ShiftVal > 4) + return None; + // Look for a valid extend instruction on the LHS of the shift. + MachineOperand &LHS = RootDef->getOperand(1); + MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); + if (!ExtDef) + return None; + Ext = getExtendTypeForInst(*ExtDef, MRI); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + ExtReg = ExtDef->getOperand(1).getReg(); + } else { + // Didn't get a shift. Try just folding an extend. + Ext = getExtendTypeForInst(*RootDef, MRI); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + ExtReg = RootDef->getOperand(1).getReg(); + + // If we have a 32 bit instruction which zeroes out the high half of a + // register, we get an implicit zero extend for free. Check if we have one. + // FIXME: We actually emit the extend right now even though we don't have + // to. + if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { + MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); + if (ExtInst && isDef32(*ExtInst)) + return None; + } + } + + // We require a GPR32 here. Narrow the ExtReg if needed using a subregister + // copy. + MachineIRBuilder MIB(*RootDef); + ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); + + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, + [=](MachineInstrBuilder &MIB) { + MIB.addImm(getArithExtendImm(Ext, ShiftVal)); + }}}; +} + void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const { const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); @@ -4506,6 +4659,26 @@ return (*MI.memoperands_begin())->getSize() == NumBytes; } +bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) + return false; + + // Only return true if we know the operation will zero-out the high half of + // the 64-bit register. Truncates can be subregister copies, which don't + // zero out the high bits. Copies and other copy-like instructions can be + // fed by truncates, or could be lowered as subregister copies. + switch (MI.getOpcode()) { + default: + return true; + case TargetOpcode::COPY: + case TargetOpcode::G_BITCAST: + case TargetOpcode::G_TRUNC: + case TargetOpcode::G_PHI: + return false; + } +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir +++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-arith-extended-reg.mir @@ -0,0 +1,634 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: add_sext_s32_to_s64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x2 + ; CHECK-LABEL: name: add_sext_s32_to_s64 + ; CHECK: liveins: $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr64sp = COPY $x2 + ; CHECK: %res:gpr64sp = ADDXrx %add_lhs, [[COPY]], 48 + ; CHECK: $x3 = COPY %res + ; CHECK: RET_ReallyLR implicit $x3 + %1:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %1(s32) + %add_lhs:gpr(s64) = COPY $x2 + %res:gpr(s64) = G_ADD %add_lhs, %ext + $x3 = COPY %res(s64) + RET_ReallyLR implicit $x3 +... +--- +name: add_and_s32_to_s64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x1, $x2 + ; CHECK-LABEL: name: add_and_s32_to_s64 + ; CHECK: liveins: $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: %add_lhs:gpr64sp = COPY $x2 + ; CHECK: %res:gpr64sp = ADDXrx %add_lhs, [[COPY2]], 16 + ; CHECK: $x3 = COPY %res + ; CHECK: RET_ReallyLR implicit $x3 + %1:gpr(s64) = COPY $x1 + %mask:gpr(s64) = G_CONSTANT i64 4294967295 ; 0xffff + %ext:gpr(s64) = G_AND %1(s64), %mask + %add_lhs:gpr(s64) = COPY $x2 + %res:gpr(s64) = G_ADD %add_lhs, %ext + $x3 = COPY %res(s64) + RET_ReallyLR implicit $x3 +... +--- +name: add_sext_s16_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_sext_s16_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 40 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_SEXT %1(s16) + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_zext_s16_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_zext_s16_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 8 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ZEXT %1(s16) + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_anyext_s16_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_anyext_s16_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 8 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ANYEXT %1(s16) + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_and_s16_to_s32_uxtb +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_and_s16_to_s32_uxtb + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 0 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %1:gpr(s32) = COPY $w1 + %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff + %ext:gpr(s32) = G_AND %1(s32), %mask + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_and_s16_to_s32_uxth +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_and_s16_to_s32_uxth + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 8 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %1:gpr(s32) = COPY $w1 + %mask:gpr(s32) = G_CONSTANT i32 65535 ; 0xffff + %ext:gpr(s32) = G_AND %1(s32), %mask + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_sext_s8_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_sext_s8_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 32 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s8) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_SEXT %1(s8) + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_zext_s8_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_zext_s8_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 0 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s8) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ZEXT %1(s8) + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_anyext_s8_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_anyext_s8_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 0 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s8) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ANYEXT %1(s8) + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_sext_with_shl +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_sext_with_shl + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, %wide_1, 43 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_SEXT %1(s16) + %imm:gpr(s32) = G_CONSTANT i32 3 + %shl:gpr(s32) = G_SHL %ext, %imm + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %shl + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: add_and_with_shl +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: add_and_with_shl + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %add_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32sp = ADDWrx %add_lhs, [[COPY]], 3 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %1:gpr(s32) = COPY $w1 + %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff + %ext:gpr(s32) = G_AND %1(s32), %mask + %imm:gpr(s32) = G_CONSTANT i32 3 + %shl:gpr(s32) = G_SHL %ext, %imm + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %shl + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: dont_fold_invalid_mask +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + ; Check that we only fold when we have a supported AND mask. + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: dont_fold_invalid_mask + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %mask:gpr32 = MOVi32imm 42 + ; CHECK: %ext:gpr32 = ANDWrr [[COPY]], %mask + ; CHECK: %add_lhs:gpr32 = COPY $w2 + ; CHECK: %res:gpr32 = ADDWrr %add_lhs, %ext + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %1:gpr(s32) = COPY $w1 + %mask:gpr(s32) = G_CONSTANT i32 42 + %ext:gpr(s32) = G_AND %1(s32), %mask + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: dont_fold_invalid_shl +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: dont_fold_invalid_shl + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %ext:gpr32 = SBFMWri %wide_1, 0, 15 + ; CHECK: %add_lhs:gpr32 = COPY $w2 + ; CHECK: %res:gpr32 = ADDWrs %add_lhs, %ext, 5 + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_SEXT %1(s16) + %imm:gpr(s32) = G_CONSTANT i32 5 + %shl:gpr(s32) = G_SHL %ext, %imm + %add_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_ADD %add_lhs, %shl + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_sext_s32_to_s64 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x2 + ; CHECK-LABEL: name: sub_sext_s32_to_s64 + ; CHECK: liveins: $w1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr64sp = COPY $x2 + ; CHECK: %res:gpr64 = SUBSXrx %sub_lhs, [[COPY]], 48, implicit-def $nzcv + ; CHECK: $x3 = COPY %res + ; CHECK: RET_ReallyLR implicit $x3 + %1:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %1(s32) + %sub_lhs:gpr(s64) = COPY $x2 + %res:gpr(s64) = G_SUB %sub_lhs, %ext + $x3 = COPY %res(s64) + RET_ReallyLR implicit $x3 +... +--- +name: sub_sext_s16_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_sext_s16_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 40, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_SEXT %1(s16) + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_zext_s16_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_zext_s16_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 8, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ZEXT %1(s16) + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_anyext_s16_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_anyext_s16_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 8, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ANYEXT %1(s16) + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_and_s16_to_s32_uxtb +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_and_s16_to_s32_uxtb + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 0, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %1:gpr(s32) = COPY $w1 + %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff + %ext:gpr(s32) = G_AND %1(s32), %mask + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_and_s16_to_s32_uxth +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_and_s16_to_s32_uxth + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 8, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %1:gpr(s32) = COPY $w1 + %mask:gpr(s32) = G_CONSTANT i32 65535 ; 0xffff + %ext:gpr(s32) = G_AND %1(s32), %mask + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +--- +name: sub_sext_s8_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s8) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_SEXT %1(s8) + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_zext_s8_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_zext_s8_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 0, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s8) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ZEXT %1(s8) + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_anyext_s8_to_s32 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_anyext_s8_to_s32 + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 0, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s8) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_ANYEXT %1(s8) + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %ext + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +--- +... +--- +name: sub_sext_with_shl +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_sext_with_shl + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: %wide_1:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, %wide_1, 43, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %wide_1:gpr(s32) = COPY $w1 + %1:gpr(s16) = G_TRUNC %wide_1 + %ext:gpr(s32) = G_SEXT %1(s16) + %imm:gpr(s32) = G_CONSTANT i32 3 + %shl:gpr(s32) = G_SHL %ext, %imm + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %shl + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 +... +--- +name: sub_and_with_shl +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $w2, $x2 + ; CHECK-LABEL: name: sub_and_with_shl + ; CHECK: liveins: $w1, $w2, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK: %sub_lhs:gpr32sp = COPY $w2 + ; CHECK: %res:gpr32 = SUBSWrx %sub_lhs, [[COPY]], 3, implicit-def $nzcv + ; CHECK: $w3 = COPY %res + ; CHECK: RET_ReallyLR implicit $w3 + %1:gpr(s32) = COPY $w1 + %mask:gpr(s32) = G_CONSTANT i32 255 ; 0xff + %ext:gpr(s32) = G_AND %1(s32), %mask + %imm:gpr(s32) = G_CONSTANT i32 3 + %shl:gpr(s32) = G_SHL %ext, %imm + %sub_lhs:gpr(s32) = COPY $w2 + %res:gpr(s32) = G_SUB %sub_lhs, %shl + $w3 = COPY %res(s32) + RET_ReallyLR implicit $w3 Index: llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll +++ llvm/trunk/test/CodeGen/AArch64/addsub_ext.ll @@ -1,4 +1,9 @@ -; RUN: llc -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s +; RUN: llc -global-isel -enable-machine-outliner=never -verify-machineinstrs %s -o - -mtriple=aarch64-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 | FileCheck %s --check-prefix=GISEL + +; FIXME: GISel only knows how to handle explicit G_SEXT instructions. So when +; G_SEXT is lowered to anything else, it won't fold in a stx*. +; FIXME: GISel doesn't currently handle folding the addressing mode into a cmp. @var8 = global i8 0 @var16 = global i16 0 @@ -7,6 +12,7 @@ define void @addsub_i8rhs() minsize { ; CHECK-LABEL: addsub_i8rhs: +; GISEL-LABEL: addsub_i8rhs: %val8_tmp = load i8, i8* @var8 %lhs32 = load i32, i32* @var32 %lhs64 = load i64, i64* @var64 @@ -20,23 +26,26 @@ %res32_zext = add i32 %lhs32, %rhs32_zext store volatile i32 %res32_zext, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb +; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb %rhs32_zext_shift = shl i32 %rhs32_zext, 3 %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift store volatile i32 %res32_zext_shift, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 - +; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 ; Zero-extending to 64-bits %rhs64_zext = zext i8 %val8 to i64 %res64_zext = add i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb %rhs64_zext_shift = shl i64 %rhs64_zext, 1 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 ; Sign-extending to 32-bits %rhs32_sext = sext i8 %val8 to i32 @@ -95,23 +104,26 @@ %res32_zext = sub i32 %lhs32, %rhs32_zext store volatile i32 %res32_zext, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb +; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb %rhs32_zext_shift = shl i32 %rhs32_zext, 3 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift store volatile i32 %res32_zext_shift, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 - +; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 ; Zero-extending to 64-bits %rhs64_zext = zext i8 %val8 to i64 %res64_zext = sub i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb +; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb %rhs64_zext_shift = shl i64 %rhs64_zext, 1 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 +; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 ; Sign-extending to 32-bits %rhs32_sext = sext i8 %val8 to i32 @@ -140,6 +152,7 @@ define void @addsub_i16rhs() minsize { ; CHECK-LABEL: addsub_i16rhs: +; GISEL-LABEL: addsub_i16rhs: %val16_tmp = load i16, i16* @var16 %lhs32 = load i32, i32* @var32 %lhs64 = load i64, i64* @var64 @@ -153,23 +166,26 @@ %res32_zext = add i32 %lhs32, %rhs32_zext store volatile i32 %res32_zext, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth +; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth %rhs32_zext_shift = shl i32 %rhs32_zext, 3 %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift store volatile i32 %res32_zext_shift, i32* @var32 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 - +; GISEL: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 ; Zero-extending to 64-bits %rhs64_zext = zext i16 %val16 to i64 %res64_zext = add i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth %rhs64_zext_shift = shl i64 %rhs64_zext, 1 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 ; Sign-extending to 32-bits %rhs32_sext = sext i16 %val16 to i32 @@ -215,6 +231,7 @@ define void @sub_i16rhs() minsize { ; CHECK-LABEL: sub_i16rhs: +; GISEL-LABEL: sub_i16rhs: %val16_tmp = load i16, i16* @var16 %lhs32 = load i32, i32* @var32 %lhs64 = load i64, i64* @var64 @@ -228,23 +245,26 @@ %res32_zext = sub i32 %lhs32, %rhs32_zext store volatile i32 %res32_zext, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth +; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth %rhs32_zext_shift = shl i32 %rhs32_zext, 3 %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift store volatile i32 %res32_zext_shift, i32* @var32 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 - +; GISEL: subs {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 ; Zero-extending to 64-bits %rhs64_zext = zext i16 %val16 to i64 %res64_zext = sub i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth +; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth %rhs64_zext_shift = shl i64 %rhs64_zext, 1 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 +; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 ; Sign-extending to 32-bits %rhs32_sext = sext i16 %val16 to i32 @@ -276,6 +296,7 @@ ; in the face of "add/sub (shifted register)" so I don't intend to. define void @addsub_i32rhs(i32 %in32) minsize { ; CHECK-LABEL: addsub_i32rhs: +; GISEL-LABEL: addsub_i32rhs: %val32_tmp = load i32, i32* @var32 %lhs64 = load i64, i64* @var64 @@ -285,22 +306,26 @@ %res64_zext = add i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw %rhs64_zext2 = zext i32 %val32 to i64 %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 %rhs64_sext = sext i32 %val32 to i64 %res64_sext = add i64 %lhs64, %rhs64_sext store volatile i64 %res64_sext, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw %rhs64_sext_shift = shl i64 %rhs64_sext, 2 %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift store volatile i64 %res64_sext_shift, i64* @var64 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 +; GISEL: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 ret void } @@ -316,12 +341,14 @@ %res64_zext = sub i64 %lhs64, %rhs64_zext store volatile i64 %res64_zext, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw +; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw %rhs64_zext2 = zext i32 %val32 to i64 %rhs64_zext_shift = shl i64 %rhs64_zext2, 2 %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift store volatile i64 %res64_zext_shift, i64* @var64 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 +; GISEL: subs {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 %rhs64_sext = sext i32 %val32 to i64 %res64_sext = sub i64 %lhs64, %rhs64_sext @@ -339,11 +366,15 @@ ; Check that implicit zext from w reg write is used instead of uxtw form of add. define i64 @add_fold_uxtw(i32 %x, i64 %y) { ; CHECK-LABEL: add_fold_uxtw: +; GISEL-LABEL: add_fold_uxtw: entry: ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 +; GISEL: and w[[TMP:[0-9]+]], w0, #0x3 +; FIXME: Global ISel produces an unncessary ubfx here. %m = and i32 %x, 3 %ext = zext i32 %m to i64 ; CHECK-NEXT: add x0, x1, x[[TMP]] +; GISEL: add x0, x1, x[[TMP]] %ret = add i64 %y, %ext ret i64 %ret } @@ -352,11 +383,14 @@ ; form of sub and that mov WZR is folded to form a neg instruction. define i64 @sub_fold_uxtw_xzr(i32 %x) { ; CHECK-LABEL: sub_fold_uxtw_xzr: +; GISEL-LABEL: sub_fold_uxtw_xzr: entry: ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 +; GISEL: and w[[TMP:[0-9]+]], w0, #0x3 %m = and i32 %x, 3 %ext = zext i32 %m to i64 ; CHECK-NEXT: neg x0, x[[TMP]] +; GISEL: negs x0, x[[TMP]] %ret = sub i64 0, %ext ret i64 %ret } @@ -378,10 +412,13 @@ ; form of add, leading to madd selection. define i64 @madd_fold_uxtw(i32 %x, i64 %y) { ; CHECK-LABEL: madd_fold_uxtw: +; GISEL-LABEL: madd_fold_uxtw: entry: ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 +; GISEL: and w[[TMP:[0-9]+]], w0, #0x3 %m = and i32 %x, 3 %ext = zext i32 %m to i64 +; GISEL: madd x0, x1, x1, x[[TMP]] ; CHECK-NEXT: madd x0, x1, x1, x[[TMP]] %mul = mul i64 %y, %y %ret = add i64 %mul, %ext @@ -408,11 +445,14 @@ ; form of add and add of -1 gets selected as sub. define i64 @add_imm_fold_uxtw(i32 %x) { ; CHECK-LABEL: add_imm_fold_uxtw: +; GISEL-LABEL: add_imm_fold_uxtw: entry: ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3 +; GISEL: and w[[TMP:[0-9]+]], w0, #0x3 %m = and i32 %x, 3 %ext = zext i32 %m to i64 ; CHECK-NEXT: sub x0, x[[TMP]], #1 +; GISEL: subs x0, x[[TMP]], #1 %ret = add i64 %ext, -1 ret i64 %ret } @@ -421,12 +461,15 @@ ; form of add and add lsl form gets selected. define i64 @add_lsl_fold_uxtw(i32 %x, i64 %y) { ; CHECK-LABEL: add_lsl_fold_uxtw: +; GISEL-LABEL: add_lsl_fold_uxtw: entry: ; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3 +; GISEL: orr w[[TMP:[0-9]+]], w0, #0x3 %m = or i32 %x, 3 %ext = zext i32 %m to i64 %shift = shl i64 %y, 3 ; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3 +; GISEL: add x0, x[[TMP]], x1, lsl #3 %ret = add i64 %ext, %shift ret i64 %ret }