diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -225,7 +225,7 @@ MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; - MachineInstr *emitTST(const Register &LHS, const Register &RHS, + MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitExtractVectorElt(Optional DstReg, const RegisterBank &DstRB, LLT ScalarTy, @@ -3905,31 +3905,31 @@ } MachineInstr * -AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS, +AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { + assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); - unsigned RegSize = MRI.getType(LHS).getSizeInBits(); + unsigned RegSize = MRI.getType(LHS.getReg()).getSizeInBits(); bool Is32Bit = (RegSize == 32); - static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri}, - {AArch64::ANDSWrr, AArch64::ANDSWri}}; + const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, + {AArch64::ANDSXrs, AArch64::ANDSWrs}, + {AArch64::ANDSXrr, AArch64::ANDSWrr}}; Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; + // ANDS needs a logical immediate for its immediate form. Check if we can + // fold one in. + if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { + if (AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize)) { + auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {ZReg}, {LHS}); + TstMI.addImm( + AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); + constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); + return &*TstMI; + } + } - // We might be able to fold in an immediate into the TST. We need to make sure - // it's a logical immediate though, since ANDS requires that. - auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); - bool IsImmForm = ValAndVReg.hasValue() && - AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize); - unsigned Opc = OpcTable[Is32Bit][IsImmForm]; - auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); - - if (IsImmForm) - TstMI.addImm( - AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); - else - TstMI.addUse(RHS); - - constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); - return &*TstMI; + if (auto Fns = selectLogicalShiftedRegister(RHS)) + return emitInstr(OpcTable[1][Is32Bit], {ZReg}, {LHS}, MIRBuilder, Fns); + return emitInstr(OpcTable[2][Is32Bit], {ZReg}, {LHS, RHS}, MIRBuilder); } std::pair @@ -4289,8 +4289,8 @@ if (!ValAndVReg || ValAndVReg->Value != 0) return nullptr; - return emitTST(LHSDef->getOperand(1).getReg(), - LHSDef->getOperand(2).getReg(), MIRBuilder); + return emitTST(LHSDef->getOperand(1), + LHSDef->getOperand(2), MIRBuilder); } return nullptr; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir @@ -510,3 +510,70 @@ %7:gpr(s32) = G_ICMP intpred(eq), %0, %1 $w0 = COPY %7(s32) RET_ReallyLR implicit $x0 + +... +--- +name: tst_fold_shift_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; We should fold the G_SHL into the ANDS to get ANDSXrs. + ; + ; CHECK-LABEL: name: tst_fold_shift_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK: %copy:gpr64 = COPY $x1 + ; CHECK: %zero:gpr64 = COPY $xzr + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: %one:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32 + ; CHECK: $xzr = ANDSXrs %zero, %copy, 16, implicit-def $nzcv + ; CHECK: %select:gpr64 = CSELXr %one, %zero, 0, implicit $nzcv + ; CHECK: $x0 = COPY %select + ; CHECK: RET_ReallyLR implicit $x0 + %copy:gpr(s64) = COPY $x1 + %zero:gpr(s64) = G_CONSTANT i64 0 + %one:gpr(s64) = G_CONSTANT i64 1 + %cst:gpr(s64) = G_CONSTANT i64 16 + %shift:gpr(s64) = G_SHL %copy(s64), %cst(s64) + %and:gpr(s64) = G_AND %zero, %shift + %cmp:gpr(s32) = G_ICMP intpred(eq), %and(s64), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + %select:gpr(s64) = G_SELECT %cmp_trunc(s1), %one, %zero + $x0 = COPY %select(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: tst_fold_shift_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + ; We should fold the G_SHL into the ANDS to get ANDSWrs. + ; + ; CHECK-LABEL: name: tst_fold_shift_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK: %copy:gpr32 = COPY $w1 + ; CHECK: %zero:gpr32 = COPY $wzr + ; CHECK: %one:gpr32 = MOVi32imm 1 + ; CHECK: $wzr = ANDSWrs %zero, %copy, 16, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSELWr %one, %zero, 0, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w1 + %zero:gpr(s32) = G_CONSTANT i32 0 + %one:gpr(s32) = G_CONSTANT i32 1 + %cst:gpr(s32) = G_CONSTANT i32 16 + %shift:gpr(s32) = G_SHL %copy(s32), %cst(s32) + %and:gpr(s32) = G_AND %zero, %shift + %cmp:gpr(s32) = G_ICMP intpred(eq), %and(s32), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + %select:gpr(s32) = G_SELECT %cmp_trunc(s1), %one, %zero + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0