diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -451,6 +451,10 @@ /// Delete \p MI and replace all of its uses with \p Replacement. void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement); + /// @brief Replaces the shift amount in \p MI with ShiftAmt % BW + /// @param MI + void applyFunnelShiftConstantModulo(MachineInstr &MI); + /// Return true if \p MOP1 and \p MOP2 are register operands are defined by /// equivalent instructions. bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2); @@ -463,6 +467,10 @@ /// equal to \p C. bool matchConstantFPOp(const MachineOperand &MOP, double C); + /// @brief Checks if constant at \p ConstIdx is larger than \p MI 's bitwidth + /// @param ConstIdx Index of the constant + bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx); + /// Optimize (cond ? x : x) -> x bool matchSelectSameVal(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -855,6 +855,28 @@ (apply [{ Helper.applyFunnelShiftToRotate(*${root}); }]) >; +// Fold fshr x, y, 0 -> y +def funnel_shift_right_zero: GICombineRule< + (defs root:$root), + (match (G_FSHR $x, $y, $z, 0):$root), + (apply (COPY $x, $z)) +>; + +// Fold fshl x, y, 0 -> x +def funnel_shift_left_zero: GICombineRule< + (defs root:$root), + (match (G_FSHL $x, $y, $z, 0):$root), + (apply (COPY $x, $y)) +>; + +// Fold fsh(l/r) x, y, C -> fsh(l/r) x, y, C % bw +def funnel_shift_overshift: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FSHL, G_FSHR):$root, + [{ return Helper.matchConstantLargerBitWidth(*${root}, 3); }]), + (apply [{ Helper.applyFunnelShiftConstantModulo(*${root}); }]) +>; + def rotate_out_of_range : GICombineRule< (defs root:$root), (match (wip_match_opcode G_ROTR, G_ROTL):$root, @@ -893,7 +915,10 @@ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift, - funnel_shift_to_rotate]>; + funnel_shift_to_rotate, + funnel_shift_right_zero, + funnel_shift_left_zero, + funnel_shift_overshift]>; def bitfield_extract_from_sext_inreg : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2625,6 +2625,45 @@ replaceRegWith(MRI, OldReg, Replacement); } +bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI, + unsigned ConstIdx) { + Register ConstReg = MI.getOperand(ConstIdx).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // Get the shift amount + auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI); + if (!VRegAndVal) + return false; + + // Return true of shift amount >= Bitwidth + return (VRegAndVal->Value.uge(DstTy.getSizeInBits())); +} + +void CombinerHelper::applyFunnelShiftConstantModulo(MachineInstr &MI) { + assert((MI.getOpcode() == TargetOpcode::G_FSHL || + MI.getOpcode() == TargetOpcode::G_FSHR) && + "This is not a funnel shift operation"); + + Register ConstReg = MI.getOperand(3).getReg(); + LLT ConstTy = MRI.getType(ConstReg); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI); + assert((VRegAndVal) && "Value is not a constant"); + + // Calculate the new Shift Amount = Old Shift Amount % BitWidth + APInt NewConst = VRegAndVal->Value.urem( + APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits())); + + Builder.setInstrAndDebugLoc(MI); + auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue()); + Builder.buildInstr( + MI.getOpcode(), {MI.getOperand(0)}, + {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)}); + + MI.eraseFromParent(); +} + bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_SELECT); // Match (cond ? x : x) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshl.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshl.mir @@ -0,0 +1,452 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s + +--- +name: fshl_i8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; CHECK-LABEL: name: fshl_i8 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s8) = G_FSHL [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %3:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %3(s32) + %4:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %4(s32) + %5:_(s32) = COPY $w2 + %2:_(s8) = G_TRUNC %5(s32) + %6:_(s8) = G_FSHL %0, %1, %2(s8) + %7:_(s32) = G_ANYEXT %6(s8) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i16 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; CHECK-LABEL: name: fshl_i16 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s16) = G_FSHL [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %3:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %3(s32) + %4:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $w2 + %2:_(s16) = G_TRUNC %5(s32) + %6:_(s16) = G_FSHL %0, %1, %2(s16) + %7:_(s32) = G_ANYEXT %6(s16) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; CHECK-LABEL: name: fshl_i32 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[COPY2]](s32) + ; CHECK-NEXT: $w0 = COPY [[FSHL]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = COPY $w2 + %3:_(s32) = G_FSHL %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i64 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: fshl_i64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s64) = G_FSHL [[COPY]], [[COPY1]], [[COPY2]](s64) + ; CHECK-NEXT: $x0 = COPY [[FSHL]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = G_FSHL %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... + +--- +name: fshl_i8_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i8_const_shift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s8) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_CONSTANT i8 5 + %5:_(s8) = G_FSHL %0, %1, %4(s8) + %6:_(s32) = G_ANYEXT %5(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i8_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i8_const_overshift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s8) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_CONSTANT i8 10 + %5:_(s8) = G_FSHL %0, %1, %4(s8) + %6:_(s32) = G_ANYEXT %5(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i8_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i8_shift_by_bidwidth + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_CONSTANT i8 8 + %5:_(s8) = G_FSHL %0, %1, %4(s8) + %6:_(s32) = G_ANYEXT %5(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i16_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i16_const_shift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s16) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_CONSTANT i16 5 + %5:_(s16) = G_FSHL %0, %1, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i16_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i16_const_overshift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s16) = G_FSHL [[TRUNC]], [[TRUNC1]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHL]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_CONSTANT i16 20 + %5:_(s16) = G_FSHL %0, %1, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i16_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i16_shift_by_bidwidth + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_CONSTANT i16 16 + %5:_(s16) = G_FSHL %0, %1, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i32_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i32_const_shift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $w0 = COPY [[FSHL]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 5 + %3:_(s32) = G_FSHL %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i32_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i32_const_overshift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s32) = G_FSHL [[COPY]], [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $w0 = COPY [[FSHL]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 42 + %3:_(s32) = G_FSHL %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i32_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshl_i32_shift_by_bidwidth + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 32 + %3:_(s32) = G_FSHL %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshl_i64_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: fshl_i64_const_shift + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s64) = G_FSHL [[COPY]], [[COPY1]], [[C]](s64) + ; CHECK-NEXT: $x0 = COPY [[FSHL]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 5 + %3:_(s64) = G_FSHL %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... + +--- +name: fshl_i64_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: fshl_i64_const_overshift + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[FSHL:%[0-9]+]]:_(s64) = G_FSHL [[COPY]], [[COPY1]], [[C]](s64) + ; CHECK-NEXT: $x0 = COPY [[FSHL]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + %3:_(s64) = G_FSHL %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... + +--- +name: fshl_i64_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: fshl_i64_shift_by_bidwidth + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 64 + %3:_(s64) = G_FSHL %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fshr.mir @@ -0,0 +1,452 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=arm64-unknown-unknown -global-isel -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s + +--- +name: fshr_i8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; CHECK-LABEL: name: fshr_i8 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %3:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %3(s32) + %4:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %4(s32) + %5:_(s32) = COPY $w2 + %2:_(s8) = G_TRUNC %5(s32) + %6:_(s8) = G_FSHR %0, %1, %2(s8) + %7:_(s32) = G_ANYEXT %6(s8) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i16 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; CHECK-LABEL: name: fshr_i16 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[TRUNC2]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %3:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %3(s32) + %4:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $w2 + %2:_(s16) = G_TRUNC %5(s32) + %6:_(s16) = G_FSHR %0, %1, %2(s16) + %7:_(s32) = G_ANYEXT %6(s16) + $w0 = COPY %7(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + + ; CHECK-LABEL: name: fshr_i32 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) + ; CHECK-NEXT: $w0 = COPY [[FSHR]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = COPY $w2 + %3:_(s32) = G_FSHR %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i64 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: fshr_i64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s64) + ; CHECK-NEXT: $x0 = COPY [[FSHR]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = G_FSHR %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... + +--- +name: fshr_i8_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i8_const_shift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 5 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_CONSTANT i8 5 + %5:_(s8) = G_FSHR %0, %1, %4(s8) + %6:_(s32) = G_ANYEXT %5(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i8_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i8_const_overshift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 2 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s8) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_CONSTANT i8 10 + %5:_(s8) = G_FSHR %0, %1, %4(s8) + %6:_(s32) = G_ANYEXT %5(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i8_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i8_shift_by_bidwidth + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s8) = G_CONSTANT i8 8 + %5:_(s8) = G_FSHR %0, %1, %4(s8) + %6:_(s32) = G_ANYEXT %5(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i16_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i16_const_shift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 5 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_CONSTANT i16 5 + %5:_(s16) = G_FSHR %0, %1, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i16_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i16_const_overshift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s16) = G_FSHR [[TRUNC]], [[TRUNC1]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSHR]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_CONSTANT i16 20 + %5:_(s16) = G_FSHR %0, %1, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i16_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i16_shift_by_bidwidth + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %2:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $w1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s16) = G_CONSTANT i16 16 + %5:_(s16) = G_FSHR %0, %1, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i32_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i32_const_shift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $w0 = COPY [[FSHR]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 5 + %3:_(s32) = G_FSHR %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i32_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i32_const_overshift + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $w0 = COPY [[FSHR]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 42 + %3:_(s32) = G_FSHR %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i32_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: fshr_i32_shift_by_bidwidth + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:_(s32) = COPY $w0 + %1:_(s32) = COPY $w1 + %2:_(s32) = G_CONSTANT i32 32 + %3:_(s32) = G_FSHR %0, %1, %2(s32) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... + +--- +name: fshr_i64_const_shift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: fshr_i64_const_shift + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[C]](s64) + ; CHECK-NEXT: $x0 = COPY [[FSHR]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 5 + %3:_(s64) = G_FSHR %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... + +--- +name: fshr_i64_const_overshift +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: fshr_i64_const_overshift + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[FSHR:%[0-9]+]]:_(s64) = G_FSHR [[COPY]], [[COPY1]], [[C]](s64) + ; CHECK-NEXT: $x0 = COPY [[FSHR]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + %3:_(s64) = G_FSHR %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... + +--- +name: fshr_i64_shift_by_bidwidth +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: fshr_i64_shift_by_bidwidth + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 64 + %3:_(s64) = G_FSHR %0, %1, %2(s64) + $x0 = COPY %3(s64) + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -394,15 +394,10 @@ } define i8 @fshr_i8_const_fold_overshift_3() { -; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_3: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov w0, #255 // =0xff -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_3: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov w0, #-1 // =0xffffffff -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fshr_i8_const_fold_overshift_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, #255 // =0xff +; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ret i8 %f } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -2790,33 +2790,33 @@ ; GFX6-LABEL: s_fshl_i32_5: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v0, s1 -; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, -5 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 27 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshl_i32_5: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s1 -; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, -5 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 27 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshl_i32_5: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, -5 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 27 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshl_i32_5: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, -5 +; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 27 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshl_i32_5: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, -5 +; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 27 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog @@ -2828,33 +2828,33 @@ ; GFX6-LABEL: s_fshl_i32_8: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_mov_b32_e32 v0, s1 -; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, -8 +; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 24 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0 ; GFX6-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_fshl_i32_8: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, s1 -; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, -8 +; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 24 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_fshl_i32_8: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s1 -; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, -8 +; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 24 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: s_fshl_i32_8: ; GFX10: ; %bb.0: -; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, -8 +; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 24 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: s_fshl_i32_8: ; GFX11: ; %bb.0: -; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, -8 +; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog @@ -2889,13 +2889,13 @@ ; GCN-LABEL: v_fshl_i32_5: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -5 +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 27 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i32_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -5 +; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 27 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5) ret i32 %result @@ -2905,13 +2905,13 @@ ; GCN-LABEL: v_fshl_i32_8: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_alignbit_b32 v0, v0, v1, -8 +; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 24 ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_fshl_i32_8: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, -8 +; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 24 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8) ret i32 %result