Index: llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -998,7 +998,11 @@ Register NextReg; // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. - if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT) + // + // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number + // on the truncated x is the same as the bit number on x. + if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || + Opc == TargetOpcode::G_TRUNC) NextReg = MI->getOperand(1).getReg(); // Did we find something worth folding? Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-trunc-tbz-tbnz.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-trunc-tbz-tbnz.mir @@ -0,0 +1,35 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +... +--- +name: fold_trunc +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: fold_trunc + ; CHECK: bb.0: + ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $x0 + ; CHECK: %copy:gpr64all = COPY $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %copy.sub_32 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: TBNZW [[COPY1]], 3, %bb.1 + ; CHECK: B %bb.0 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + successors: %bb.0, %bb.1 + liveins: $x0 + %copy:gpr(s64) = COPY $x0 + %bit:gpr(s32) = G_CONSTANT i32 8 + %zero:gpr(s32) = G_CONSTANT i32 0 + %fold_me:gpr(s32) = G_TRUNC %copy(s64) + %and:gpr(s32) = G_AND %fold_me, %bit + %cmp:gpr(s32) = G_ICMP intpred(ne), %and(s32), %zero + %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32) + G_BRCOND %cmp_trunc(s1), %bb.1 + G_BR %bb.0 + bb.1: + RET_ReallyLR