diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -383,6 +383,14 @@ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; +// Fold x op 0 -> x +def fourth_identity_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_FSHR):$root, + [{ return Helper.matchConstantOp(${root}->getOperand(3), 0); }]), + (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 2); }]) +>; + // Fold x op 1 -> x def right_identity_one: GICombineRule< (defs root:$root), @@ -1081,6 +1089,7 @@ insert_extract_vec_elt_out_of_bounds]>; def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, + fourth_identity_zero, binop_same_val, binop_left_to_zero, binop_right_to_zero, p2i_to_i2p, i2p_to_p2i, anyext_trunc_fold, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -51,6 +51,10 @@ LegalizerHelper &Helper) const; bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer, + LegalizerHelper &Helper) const; bool legalizeCTPOP(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -871,7 +871,9 @@ getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); }); - getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); + getActionDefinitionsBuilder({G_FSHL, G_FSHR}) + .customFor({{s32, s32}, {s32, s64}, {s64, s64}}) + .lower(); getActionDefinitionsBuilder(G_ROTR) .legalFor({{s32, s64}, {s64, s64}}) @@ -984,6 +986,9 @@ case TargetOpcode::G_SBFX: case TargetOpcode::G_UBFX: return legalizeBitfieldExtract(MI, MRI, Helper); + case TargetOpcode::G_FSHL: + case TargetOpcode::G_FSHR: + return legalizeFunnelShift(MI, MRI, MIRBuilder, Observer, Helper); case TargetOpcode::G_ROTR: return legalizeRotate(MI, MRI, Helper); case TargetOpcode::G_CTPOP: @@ -1004,6 +1009,61 @@ llvm_unreachable("expected switch to return"); } +bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, + GISelChangeObserver &Observer, + LegalizerHelper &Helper) const { + assert(MI.getOpcode() == TargetOpcode::G_FSHL || + MI.getOpcode() == TargetOpcode::G_FSHR); + + // Keep as G_FSHR if shift amount is a G_CONSTANT, else use generic + // lowering + Register ShiftNo = MI.getOperand(3).getReg(); + LLT ShiftTy = MRI.getType(ShiftNo); + auto VRegAndVal = getIConstantVRegValWithLookThrough(ShiftNo, MRI); + if (!VRegAndVal) + return (Helper.lowerFunnelShiftAsShifts(MI) == + LegalizerHelper::LegalizeResult::Legalized); + + // Adjust shift amount according to Opcode (FSHL/FSHR) + // Convert FSHL to FSHR + int64_t BitWidth = ShiftTy.getSizeInBits(); + APInt Amount(64, + MI.getOpcode() == TargetOpcode::G_FSHL + ? BitWidth - VRegAndVal->Value.getSExtValue() + : VRegAndVal->Value.getSExtValue(), + true); + + // If the instruction is G_FSHR, has a 64-bit G_CONSTANT for shift amount + // in the range of 0 <-> BitWidth, it is legal + if (ShiftTy.getSizeInBits() == 64 && MI.getOpcode() == TargetOpcode::G_FSHR && + Amount.ult(BitWidth) && Amount.uge(0)) { + return true; + } + + // Modulo the shift amount by the bit width + Amount = Amount.urem(BitWidth); + + // Cast the ShiftNumber to a 64-bit type + auto Cast64 = MIRBuilder.buildConstant(LLT::scalar(64), Amount); + + if (MI.getOpcode() == TargetOpcode::G_FSHR) { + Observer.changingInstr(MI); + MI.getOperand(3).setReg(Cast64.getReg(0)); + Observer.changedInstr(MI); + } + // If Opcode is FSHL, remove the FSHL instruction and create a FSHR + // instruction + else if (MI.getOpcode() == TargetOpcode::G_FSHL) { + MIRBuilder.buildInstr(TargetOpcode::G_FSHR, {MI.getOperand(0).getReg()}, + {MI.getOperand(1).getReg(), MI.getOperand(2).getReg(), + Cast64.getReg(0)}); + MI.eraseFromParent(); + } + return true; +} + bool AArch64LegalizerInfo::legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare i8 @llvm.fshl.i8(i8, i8, i8) declare i16 @llvm.fshl.i16(i16, i16, i16) @@ -17,49 +18,110 @@ ; General case - all operands can be variables. define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: fshl_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w1, #1 -; CHECK-NEXT: lsl w10, w0, w2 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: orr w0, w10, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-SD-NEXT: mvn w8, w2 +; CHECK-SD-NEXT: lsr w9, w1, #1 +; CHECK-SD-NEXT: lsl w10, w0, w2 +; CHECK-SD-NEXT: lsr w8, w9, w8 +; CHECK-SD-NEXT: orr w0, w10, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsr w10, w1, #1 +; CHECK-GI-NEXT: lsl w9, w0, w9 +; CHECK-GI-NEXT: lsr w8, w10, w8 +; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: ret %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) ret i32 %f } define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { -; CHECK-LABEL: fshl_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr x9, x1, #1 -; CHECK-NEXT: lsl x10, x0, x2 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: orr x0, x10, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mvn w8, w2 +; CHECK-SD-NEXT: lsr x9, x1, #1 +; CHECK-SD-NEXT: lsl x10, x0, x2 +; CHECK-SD-NEXT: lsr x8, x9, x8 +; CHECK-SD-NEXT: orr x0, x10, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #63 // =0x3f +; CHECK-GI-NEXT: and x9, x2, #0x3f +; CHECK-GI-NEXT: bic x8, x8, x2 +; CHECK-GI-NEXT: lsr x10, x1, #1 +; CHECK-GI-NEXT: lsl x9, x0, x9 +; CHECK-GI-NEXT: lsr x8, x10, x8 +; CHECK-GI-NEXT: orr x0, x9, x8 +; CHECK-GI-NEXT: ret %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) ret i64 %f } define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { -; CHECK-LABEL: fshl_i128: -; CHECK: // %bb.0: -; CHECK-NEXT: tst x4, #0x40 -; CHECK-NEXT: mvn w8, w4 -; CHECK-NEXT: csel x9, x2, x3, ne -; CHECK-NEXT: csel x10, x3, x0, ne -; CHECK-NEXT: lsr x9, x9, #1 -; CHECK-NEXT: lsl x11, x10, x4 -; CHECK-NEXT: csel x12, x0, x1, ne -; CHECK-NEXT: lsr x10, x10, #1 -; CHECK-NEXT: lsr x9, x9, x8 -; CHECK-NEXT: lsl x12, x12, x4 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: orr x0, x11, x9 -; CHECK-NEXT: orr x1, x12, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i128: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: tst x4, #0x40 +; CHECK-SD-NEXT: mvn w8, w4 +; CHECK-SD-NEXT: csel x9, x2, x3, ne +; CHECK-SD-NEXT: csel x10, x3, x0, ne +; CHECK-SD-NEXT: lsr x9, x9, #1 +; CHECK-SD-NEXT: lsl x11, x10, x4 +; CHECK-SD-NEXT: csel x12, x0, x1, ne +; CHECK-SD-NEXT: lsr x10, x10, #1 +; CHECK-SD-NEXT: lsr x9, x9, x8 +; CHECK-SD-NEXT: lsl x12, x12, x4 +; CHECK-SD-NEXT: lsr x8, x10, x8 +; CHECK-SD-NEXT: orr x0, x11, x9 +; CHECK-SD-NEXT: orr x1, x12, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i128: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #64 // =0x40 +; CHECK-GI-NEXT: and x9, x4, #0x7f +; CHECK-GI-NEXT: sub x11, x8, x9 +; CHECK-GI-NEXT: sub x13, x9, #64 +; CHECK-GI-NEXT: mov w10, #127 // =0x7f +; CHECK-GI-NEXT: cmp x9, #64 +; CHECK-GI-NEXT: bic x10, x10, x4 +; CHECK-GI-NEXT: lsl x15, x1, x9 +; CHECK-GI-NEXT: lsr x11, x0, x11 +; CHECK-GI-NEXT: lsl x14, x0, x9 +; CHECK-GI-NEXT: orr x11, x11, x15 +; CHECK-GI-NEXT: lsl x9, x0, x13 +; CHECK-GI-NEXT: sub x8, x8, x10 +; CHECK-GI-NEXT: csel x9, x11, x9, lo +; CHECK-GI-NEXT: lsl x11, x3, #63 +; CHECK-GI-NEXT: sub x15, x10, #64 +; CHECK-GI-NEXT: csel x13, x14, xzr, lo +; CHECK-GI-NEXT: orr x11, x11, x2, lsr #1 +; CHECK-GI-NEXT: lsr x14, x3, #1 +; CHECK-GI-NEXT: tst x4, #0x7f +; CHECK-GI-NEXT: lsr x16, x11, x10 +; CHECK-GI-NEXT: lsl x8, x14, x8 +; CHECK-GI-NEXT: mvn x12, x4 +; CHECK-GI-NEXT: csel x9, x1, x9, eq +; CHECK-GI-NEXT: lsr x17, x14, x10 +; CHECK-GI-NEXT: orr x8, x16, x8 +; CHECK-GI-NEXT: lsr x14, x14, x15 +; CHECK-GI-NEXT: cmp x10, #64 +; CHECK-GI-NEXT: csel x8, x8, x14, lo +; CHECK-GI-NEXT: tst x12, #0x7f +; CHECK-GI-NEXT: csel x8, x11, x8, eq +; CHECK-GI-NEXT: cmp x10, #64 +; CHECK-GI-NEXT: csel x10, x17, xzr, lo +; CHECK-GI-NEXT: orr x0, x13, x8 +; CHECK-GI-NEXT: orr x1, x9, x10 +; CHECK-GI-NEXT: ret %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) ret i128 %f } @@ -67,22 +129,38 @@ ; Verify that weird types are minimally supported. declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { -; CHECK-LABEL: fshl_i37: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #46053 -; CHECK-NEXT: and x8, x2, #0x1fffffffff -; CHECK-NEXT: movk x9, #12398, lsl #16 -; CHECK-NEXT: ubfiz x10, x1, #26, #37 -; CHECK-NEXT: movk x9, #15941, lsl #32 -; CHECK-NEXT: movk x9, #1771, lsl #48 -; CHECK-NEXT: umulh x8, x8, x9 -; CHECK-NEXT: mov w9, #37 -; CHECK-NEXT: msub w8, w8, w9, w2 -; CHECK-NEXT: mvn w9, w8 -; CHECK-NEXT: lsl x8, x0, x8 -; CHECK-NEXT: lsr x9, x10, x9 -; CHECK-NEXT: orr x0, x8, x9 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i37: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x9, #46053 // =0xb3e5 +; CHECK-SD-NEXT: and x8, x2, #0x1fffffffff +; CHECK-SD-NEXT: movk x9, #12398, lsl #16 +; CHECK-SD-NEXT: ubfiz x10, x1, #26, #37 +; CHECK-SD-NEXT: movk x9, #15941, lsl #32 +; CHECK-SD-NEXT: movk x9, #1771, lsl #48 +; CHECK-SD-NEXT: umulh x8, x8, x9 +; CHECK-SD-NEXT: mov w9, #37 // =0x25 +; CHECK-SD-NEXT: msub w8, w8, w9, w2 +; CHECK-SD-NEXT: mvn w9, w8 +; CHECK-SD-NEXT: lsl x8, x0, x8 +; CHECK-SD-NEXT: lsr x9, x10, x9 +; CHECK-SD-NEXT: orr x0, x8, x9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i37: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #37 // =0x25 +; CHECK-GI-NEXT: and x9, x2, #0x1fffffffff +; CHECK-GI-NEXT: udiv x10, x9, x8 +; CHECK-GI-NEXT: msub x8, x10, x8, x9 +; CHECK-GI-NEXT: mov w9, #36 // =0x24 +; CHECK-GI-NEXT: ubfx x10, x1, #1, #36 +; CHECK-GI-NEXT: sub x9, x9, x8 +; CHECK-GI-NEXT: and x8, x8, #0x1fffffffff +; CHECK-GI-NEXT: and x9, x9, #0x1fffffffff +; CHECK-GI-NEXT: lsl x8, x0, x8 +; CHECK-GI-NEXT: lsr x9, x10, x9 +; CHECK-GI-NEXT: orr x0, x8, x9 +; CHECK-GI-NEXT: ret %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } @@ -93,26 +171,37 @@ define i7 @fshl_i7_const_fold() { ; CHECK-LABEL: fshl_i7_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #67 +; CHECK-NEXT: mov w0, #67 // =0x43 ; CHECK-NEXT: ret %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) ret i7 %f } define i8 @fshl_i8_const_fold_overshift_1() { -; CHECK-LABEL: fshl_i8_const_fold_overshift_1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i8_const_fold_overshift_1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w0, #128 // =0x80 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i8_const_fold_overshift_1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w0, #-128 // =0xffffff80 +; CHECK-GI-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) ret i8 %f } define i8 @fshl_i8_const_fold_overshift_2() { -; CHECK-LABEL: fshl_i8_const_fold_overshift_2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #120 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i8_const_fold_overshift_2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w0, #120 // =0x78 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i8_const_fold_overshift_2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #15 // =0xf +; CHECK-GI-NEXT: lsl w0, w8, #3 +; CHECK-GI-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) ret i8 %f } @@ -162,10 +251,15 @@ ; This should work without any node-specific logic. define i8 @fshl_i8_const_fold() { -; CHECK-LABEL: fshl_i8_const_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i8_const_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w0, #128 // =0x80 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i8_const_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w0, #-128 // =0xffffff80 +; CHECK-GI-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) ret i8 %f } @@ -175,28 +269,50 @@ ; General case - all operands can be variables. define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: fshr_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 -; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 -; CHECK-NEXT: orr w0, w8, w10 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshr_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-SD-NEXT: mvn w8, w2 +; CHECK-SD-NEXT: lsl w9, w0, #1 +; CHECK-SD-NEXT: lsr w10, w1, w2 +; CHECK-SD-NEXT: lsl w8, w9, w8 +; CHECK-SD-NEXT: orr w0, w8, w10 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshr_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsl w10, w0, #1 +; CHECK-GI-NEXT: lsr w9, w1, w9 +; CHECK-GI-NEXT: lsl w8, w10, w8 +; CHECK-GI-NEXT: orr w0, w8, w9 +; CHECK-GI-NEXT: ret %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) ret i32 %f } define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { -; CHECK-LABEL: fshr_i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl x9, x0, #1 -; CHECK-NEXT: lsr x10, x1, x2 -; CHECK-NEXT: lsl x8, x9, x8 -; CHECK-NEXT: orr x0, x8, x10 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshr_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mvn w8, w2 +; CHECK-SD-NEXT: lsl x9, x0, #1 +; CHECK-SD-NEXT: lsr x10, x1, x2 +; CHECK-SD-NEXT: lsl x8, x9, x8 +; CHECK-SD-NEXT: orr x0, x8, x10 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshr_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #63 // =0x3f +; CHECK-GI-NEXT: and x9, x2, #0x3f +; CHECK-GI-NEXT: bic x8, x8, x2 +; CHECK-GI-NEXT: lsl x10, x0, #1 +; CHECK-GI-NEXT: lsr x9, x1, x9 +; CHECK-GI-NEXT: lsl x8, x10, x8 +; CHECK-GI-NEXT: orr x0, x8, x9 +; CHECK-GI-NEXT: ret %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) ret i64 %f } @@ -204,24 +320,41 @@ ; Verify that weird types are minimally supported. declare i37 @llvm.fshr.i37(i37, i37, i37) define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { -; CHECK-LABEL: fshr_i37: -; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #46053 -; CHECK-NEXT: and x8, x2, #0x1fffffffff -; CHECK-NEXT: movk x9, #12398, lsl #16 -; CHECK-NEXT: lsl x10, x1, #27 -; CHECK-NEXT: movk x9, #15941, lsl #32 -; CHECK-NEXT: lsl x11, x0, #1 -; CHECK-NEXT: movk x9, #1771, lsl #48 -; CHECK-NEXT: umulh x8, x8, x9 -; CHECK-NEXT: mov w9, #37 -; CHECK-NEXT: msub w8, w8, w9, w2 -; CHECK-NEXT: add w8, w8, #27 -; CHECK-NEXT: mvn w9, w8 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: lsl x9, x11, x9 -; CHECK-NEXT: orr x0, x9, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshr_i37: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x9, #46053 // =0xb3e5 +; CHECK-SD-NEXT: and x8, x2, #0x1fffffffff +; CHECK-SD-NEXT: movk x9, #12398, lsl #16 +; CHECK-SD-NEXT: lsl x10, x1, #27 +; CHECK-SD-NEXT: movk x9, #15941, lsl #32 +; CHECK-SD-NEXT: lsl x11, x0, #1 +; CHECK-SD-NEXT: movk x9, #1771, lsl #48 +; CHECK-SD-NEXT: umulh x8, x8, x9 +; CHECK-SD-NEXT: mov w9, #37 // =0x25 +; CHECK-SD-NEXT: msub w8, w8, w9, w2 +; CHECK-SD-NEXT: add w8, w8, #27 +; CHECK-SD-NEXT: mvn w9, w8 +; CHECK-SD-NEXT: lsr x8, x10, x8 +; CHECK-SD-NEXT: lsl x9, x11, x9 +; CHECK-SD-NEXT: orr x0, x9, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshr_i37: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #37 // =0x25 +; CHECK-GI-NEXT: and x9, x2, #0x1fffffffff +; CHECK-GI-NEXT: and x11, x1, #0x1fffffffff +; CHECK-GI-NEXT: udiv x10, x9, x8 +; CHECK-GI-NEXT: msub x8, x10, x8, x9 +; CHECK-GI-NEXT: mov w9, #36 // =0x24 +; CHECK-GI-NEXT: lsl x10, x0, #1 +; CHECK-GI-NEXT: sub x9, x9, x8 +; CHECK-GI-NEXT: and x8, x8, #0x1fffffffff +; CHECK-GI-NEXT: and x9, x9, #0x1fffffffff +; CHECK-GI-NEXT: lsr x8, x11, x8 +; CHECK-GI-NEXT: lsl x9, x10, x9 +; CHECK-GI-NEXT: orr x0, x9, x8 +; CHECK-GI-NEXT: ret %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } @@ -232,35 +365,52 @@ define i7 @fshr_i7_const_fold() { ; CHECK-LABEL: fshr_i7_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #31 +; CHECK-NEXT: mov w0, #31 // =0x1f ; CHECK-NEXT: ret %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) ret i7 %f } define i8 @fshr_i8_const_fold_overshift_1() { -; CHECK-LABEL: fshr_i8_const_fold_overshift_1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w0, #254 // =0xfe +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w0, #-2 // =0xfffffffe +; CHECK-GI-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) ret i8 %f } define i8 @fshr_i8_const_fold_overshift_2() { -; CHECK-LABEL: fshr_i8_const_fold_overshift_2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #225 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w0, #225 // =0xe1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #15 // =0xf +; CHECK-GI-NEXT: lsl w9, w8, #5 +; CHECK-GI-NEXT: orr w0, w9, w8, lsr #3 +; CHECK-GI-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ret i8 %f } define i8 @fshr_i8_const_fold_overshift_3() { -; CHECK-LABEL: fshr_i8_const_fold_overshift_3: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #255 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshr_i8_const_fold_overshift_3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w0, #255 // =0xff +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshr_i8_const_fold_overshift_3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w0, #-1 // =0xffffffff +; CHECK-GI-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ret i8 %f } @@ -301,18 +451,28 @@ ; This should work without any node-specific logic. define i8 @fshr_i8_const_fold() { -; CHECK-LABEL: fshr_i8_const_fold: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshr_i8_const_fold: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w0, #254 // =0xfe +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshr_i8_const_fold: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w0, #-2 // =0xfffffffe +; CHECK-GI-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) ret i8 %f } define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) { -; CHECK-LABEL: fshl_i32_shift_by_bitwidth: -; CHECK: // %bb.0: -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fshl_i32_shift_by_bitwidth: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fshl_i32_shift_by_bitwidth: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w0, w1 +; CHECK-GI-NEXT: ret %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) ret i32 %f } @@ -344,17 +504,30 @@ } define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) { -; CHECK-LABEL: or_shl_fshl: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsr w10, w1, #1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w0, w8 -; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w8, w10 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: or_shl_fshl: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, w2 +; CHECK-SD-NEXT: mvn w9, w2 +; CHECK-SD-NEXT: lsr w10, w1, #1 +; CHECK-SD-NEXT: lsr w9, w10, w9 +; CHECK-SD-NEXT: lsl w8, w0, w8 +; CHECK-SD-NEXT: lsl w10, w1, w2 +; CHECK-SD-NEXT: orr w8, w8, w9 +; CHECK-SD-NEXT: orr w0, w8, w10 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: or_shl_fshl: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsl w10, w1, w2 +; CHECK-GI-NEXT: lsr w11, w1, #1 +; CHECK-GI-NEXT: lsl w9, w0, w9 +; CHECK-GI-NEXT: lsr w8, w11, w8 +; CHECK-GI-NEXT: orr w9, w9, w10 +; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) %or = or i32 %fun, %shy @@ -376,17 +549,30 @@ } define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) { -; CHECK-LABEL: or_shl_fshl_commute: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsr w10, w1, #1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w0, w8 -; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w10, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: or_shl_fshl_commute: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, w2 +; CHECK-SD-NEXT: mvn w9, w2 +; CHECK-SD-NEXT: lsr w10, w1, #1 +; CHECK-SD-NEXT: lsr w9, w10, w9 +; CHECK-SD-NEXT: lsl w8, w0, w8 +; CHECK-SD-NEXT: lsl w10, w1, w2 +; CHECK-SD-NEXT: orr w8, w8, w9 +; CHECK-SD-NEXT: orr w0, w10, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: or_shl_fshl_commute: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsl w10, w1, w2 +; CHECK-GI-NEXT: lsr w11, w1, #1 +; CHECK-GI-NEXT: lsl w9, w0, w9 +; CHECK-GI-NEXT: lsr w8, w11, w8 +; CHECK-GI-NEXT: orr w9, w10, w9 +; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) %or = or i32 %shy, %fun @@ -408,17 +594,30 @@ } define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) { -; CHECK-LABEL: or_lshr_fshr: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsl w10, w1, #1 -; CHECK-NEXT: lsr w8, w0, w8 -; CHECK-NEXT: lsl w9, w10, w9 -; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w0, w8, w10 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: or_lshr_fshr: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, w2 +; CHECK-SD-NEXT: mvn w9, w2 +; CHECK-SD-NEXT: lsl w10, w1, #1 +; CHECK-SD-NEXT: lsr w8, w0, w8 +; CHECK-SD-NEXT: lsl w9, w10, w9 +; CHECK-SD-NEXT: lsr w10, w1, w2 +; CHECK-SD-NEXT: orr w8, w9, w8 +; CHECK-SD-NEXT: orr w0, w8, w10 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: or_lshr_fshr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsr w10, w1, w2 +; CHECK-GI-NEXT: lsl w11, w1, #1 +; CHECK-GI-NEXT: lsr w9, w0, w9 +; CHECK-GI-NEXT: lsl w8, w11, w8 +; CHECK-GI-NEXT: orr w9, w9, w10 +; CHECK-GI-NEXT: orr w0, w8, w9 +; CHECK-GI-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) %or = or i32 %fun, %shy @@ -439,17 +638,30 @@ } define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) { -; CHECK-LABEL: or_lshr_fshr_commute: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsl w10, w1, #1 -; CHECK-NEXT: lsr w8, w0, w8 -; CHECK-NEXT: lsl w9, w10, w9 -; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w0, w10, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: or_lshr_fshr_commute: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, w2 +; CHECK-SD-NEXT: mvn w9, w2 +; CHECK-SD-NEXT: lsl w10, w1, #1 +; CHECK-SD-NEXT: lsr w8, w0, w8 +; CHECK-SD-NEXT: lsl w9, w10, w9 +; CHECK-SD-NEXT: lsr w10, w1, w2 +; CHECK-SD-NEXT: orr w8, w9, w8 +; CHECK-SD-NEXT: orr w0, w10, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: or_lshr_fshr_commute: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsr w10, w1, w2 +; CHECK-GI-NEXT: lsl w11, w1, #1 +; CHECK-GI-NEXT: lsr w9, w0, w9 +; CHECK-GI-NEXT: lsl w8, w11, w8 +; CHECK-GI-NEXT: orr w9, w10, w9 +; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) %or = or i32 %shy, %fun @@ -470,15 +682,28 @@ } define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) { -; CHECK-LABEL: or_shl_fshl_simplify: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w0, #1 -; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: orr w0, w10, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: or_shl_fshl_simplify: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-SD-NEXT: mvn w8, w2 +; CHECK-SD-NEXT: lsr w9, w0, #1 +; CHECK-SD-NEXT: lsl w10, w1, w2 +; CHECK-SD-NEXT: lsr w8, w9, w8 +; CHECK-SD-NEXT: orr w0, w10, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: or_shl_fshl_simplify: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsl w10, w1, w2 +; CHECK-GI-NEXT: lsr w11, w0, #1 +; CHECK-GI-NEXT: lsl w9, w1, w9 +; CHECK-GI-NEXT: lsr w8, w11, w8 +; CHECK-GI-NEXT: orr w9, w9, w10 +; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) %or = or i32 %fun, %shy @@ -486,15 +711,28 @@ } define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) { -; CHECK-LABEL: or_lshr_fshr_simplify: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 -; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 -; CHECK-NEXT: orr w0, w8, w10 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: or_lshr_fshr_simplify: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-SD-NEXT: mvn w8, w2 +; CHECK-SD-NEXT: lsl w9, w0, #1 +; CHECK-SD-NEXT: lsr w10, w1, w2 +; CHECK-SD-NEXT: lsl w8, w9, w8 +; CHECK-SD-NEXT: orr w0, w8, w10 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: or_lshr_fshr_simplify: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #31 // =0x1f +; CHECK-GI-NEXT: and w9, w2, #0x1f +; CHECK-GI-NEXT: bic w8, w8, w2 +; CHECK-GI-NEXT: lsr w10, w1, w2 +; CHECK-GI-NEXT: lsl w11, w0, #1 +; CHECK-GI-NEXT: lsr w9, w1, w9 +; CHECK-GI-NEXT: lsl w8, w11, w8 +; CHECK-GI-NEXT: orr w9, w10, w9 +; CHECK-GI-NEXT: orr w0, w9, w8 +; CHECK-GI-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s) %or = or i32 %shy, %fun