Index: llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -758,6 +758,12 @@ /// to a min/max instruction of some sort. bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo); + /// fshl (x, y, undef) -> x for scalars + /// fshr (x, y, undef) -> y for scalars + /// fshl (x, y, undef) -> undef for vectors + /// fshr (x, y, undef) -> undef for vectors + void applyUndefFunnelShiftAmount(MachineInstr &MI); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. Index: llvm/include/llvm/Target/GlobalISel/Combine.td =================================================================== --- llvm/include/llvm/Target/GlobalISel/Combine.td +++ llvm/include/llvm/Target/GlobalISel/Combine.td @@ -711,6 +711,13 @@ (apply [{ Helper.applyFunnelShiftToRotate(*${root}); }]) >; +def undef_funnel_shift_amt: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FSHL, G_FSHR):$root, + [{ return Helper.matchOperandIsUndef(*${root}, 3); }]), + (apply [{ Helper.applyUndefFunnelShiftAmount(*${root}); }]) +>; + def rotate_out_of_range : GICombineRule< (defs root:$root), (match (wip_match_opcode G_ROTR, G_ROTL):$root, @@ -960,7 +967,8 @@ propagate_undef_shuffle_mask, erase_undef_store, unmerge_undef, - insert_extract_vec_elt_out_of_bounds]>; + insert_extract_vec_elt_out_of_bounds, + undef_funnel_shift_amt]>; def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, binop_same_val, binop_left_to_zero, Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5989,6 +5989,15 @@ return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo); } +void CombinerHelper::applyUndefFunnelShiftAmount(MachineInstr &MI) { + if (MRI.getType(MI.getOperand(0).getReg()).isVector()) { + replaceInstWithUndef(MI); + return; + } + replaceSingleDefInstWithOperand( + MI, MI.getOpcode() == TargetOpcode::G_FSHL ? 1 : 2); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; Index: llvm/test/CodeGen/AArch64/GlobalISel/combine-undef-funnel-shift-amt.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/combine-undef-funnel-shift-amt.mir @@ -0,0 +1,80 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s + +... +--- +name: scalar_fshl +tracksRegLiveness: true +body: | + bb.0: + ; fshl (op1, op2, undef) -> op1 for scalars + ; CHECK-LABEL: name: scalar_fshl + ; CHECK: %op1:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: $w0 = COPY %op1(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %op1:_(s32) = G_CONSTANT i32 1 + %op2:_(s32) = G_CONSTANT i32 10000 + %undef:_(s32) = G_IMPLICIT_DEF + %shift:_(s32) = G_FSHL %op1, %op2, %undef(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: scalar_fshr +tracksRegLiveness: true +body: | + bb.0: + ; fshr (op1, op2, undef) -> op2 for scalars + ; CHECK-LABEL: name: scalar_fshr + ; CHECK: %op2:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: $w0 = COPY %op2(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %op1:_(s32) = G_CONSTANT i32 1 + %op2:_(s32) = G_CONSTANT i32 2 + %undef:_(s32) = G_IMPLICIT_DEF + %shift:_(s32) = G_FSHR %op1, %op2, %undef(s32) + $w0 = COPY %shift(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: vector_fshl +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; fshl (op1, op2, undef) -> undef for vectors + ; CHECK-LABEL: name: vector_fshl + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %undef:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $q0 = COPY %undef(<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %op1:_(<4 x s32>) = COPY $q0 + %op2:_(<4 x s32>) = COPY $q0 + %undef:_(<4 x s32>) = G_IMPLICIT_DEF + %shift:_(<4 x s32>) = G_FSHL %op1, %op2, %undef(<4 x s32>) + $q0 = COPY %shift(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: vector_fshr +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + ; fshr (op1, op2, undef) -> undef for vectors + ; CHECK-LABEL: name: vector_fshr + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %undef:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $q0 = COPY %undef(<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %op1:_(<4 x s32>) = COPY $q0 + %op2:_(<4 x s32>) = COPY $q0 + %undef:_(<4 x s32>) = G_IMPLICIT_DEF + %shift:_(<4 x s32>) = G_FSHR %op1, %op2, %undef(<4 x s32>) + $q0 = COPY %shift(<4 x s32>) + RET_ReallyLR implicit $q0 +...