Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -76,6 +76,10 @@ PIC_ADD, // Add with a PC operand and a PIC label. + ASRL, // MVE long arithmetic shift right. + LSRL, // MVE long shift right. + LSLL, // MVE long shift left. + CMP, // ARM compare instructions. CMN, // ARM CMN instructions. CMPZ, // ARM compare that sets only Z flag. Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -833,6 +833,11 @@ setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + // MVE lowers 64 bit shifts to lsll and lsrl + // assuming that ISD::SRL and SRA of i64 are already marked custom + if (Subtarget->hasMVEIntegerOps()) + setOperationAction(ISD::SHL, MVT::i64, Custom); + // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1. if (Subtarget->isThumb1Only()) { setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -1312,6 +1317,10 @@ case ARMISD::SSAT: return "ARMISD::SSAT"; case ARMISD::USAT: return "ARMISD::USAT"; + case ARMISD::ASRL: return "ARMISD::ASRL"; + case ARMISD::LSRL: return "ARMISD::LSRL"; + case ARMISD::LSLL: return "ARMISD::LSLL"; + case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; @@ -5519,11 +5528,54 @@ if (VT != MVT::i64) return SDValue(); - assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SHL) && "Unknown shift to lower!"); + unsigned ShOpc = N->getOpcode(); + if (ST->hasMVEIntegerOps()) { + SDValue ShAmt = N->getOperand(1); + unsigned ShPartsOpc = ARMISD::LSLL; + ConstantSDNode *Con = dyn_cast(ShAmt); + + // If the shift amount is greater than 32 then do the default optimisation + if (Con && Con->getZExtValue() > 32) + return SDValue(); + + // Extract the lower 32 bits of the shift amount if it's an i64 + if (ShAmt->getValueType(0) == MVT::i64) + ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt, + DAG.getConstant(0, dl, MVT::i32)); + + if (ShOpc == ISD::SRL) { + if (!Con) + // There is no t2LSRLr instruction so negate and perform an lsll if the + // shift amount is in a register, emulating a right shift. + ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(0, dl, MVT::i32), ShAmt); + else + // Else generate an lsrl on the immediate shift amount + ShPartsOpc = ARMISD::LSRL; + } else if (ShOpc == ISD::SRA) + ShPartsOpc = ARMISD::ASRL; + + // Lower 32 bits of the destination/source + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), + DAG.getConstant(0, dl, MVT::i32)); + // Upper 32 bits of the destination/source + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), + DAG.getConstant(1, dl, MVT::i32)); + + // Generate the shift operation as computed above + Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi, + ShAmt); + // The upper 32 bits come from the second return value of lsll + Hi = SDValue(Lo.getNode(), 1); + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); + } + // We only lower SRA, SRL of 1 here, all others use generic lowering. - if (!isOneConstant(N->getOperand(1))) + if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL) return SDValue(); // If we are in thumb mode, we don't have RRX. @@ -8153,6 +8205,7 @@ break; case ISD::SRL: case ISD::SRA: + case ISD::SHL: Res = Expand64BitShift(N, DAG, Subtarget); break; case ISD::SREM: Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -99,6 +99,13 @@ SDTCisSameAs<0, 4>, SDTCisSameAs<0, 5>]>; +// ARMlsll, ARMlsrl, ARMasrl +def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisInt<4>]>; + def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; @@ -171,6 +178,10 @@ def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; +def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>; +def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>; +def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>; + def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -209,11 +209,21 @@ let DecoderMethod = "DecodeMVEOverlappingLongShift"; } -def t2ASRLr : t2MVEShiftDRegReg<"asrl", 0b1, 0b0>; -def t2ASRLi : t2MVEShiftDRegImm<"asrl", 0b10, ?>; -def t2LSLLr : t2MVEShiftDRegReg<"lsll", 0b0, 0b0>; -def t2LSLLi : t2MVEShiftDRegImm<"lsll", 0b00, ?>; -def t2LSRL : t2MVEShiftDRegImm<"lsrl", 0b01, ?>; +def t2ASRLr : t2MVEShiftDRegReg<"asrl", 0b1, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + (ARMasrl tGPREven:$RdaLo_src, + tGPROdd:$RdaHi_src, rGPR:$Rm))]>; +def t2ASRLi : t2MVEShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + (ARMasrl tGPREven:$RdaLo_src, + tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; +def t2LSLLr : t2MVEShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + (ARMlsll tGPREven:$RdaLo_src, + tGPROdd:$RdaHi_src, rGPR:$Rm))]>; +def t2LSLLi : t2MVEShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + (ARMlsll tGPREven:$RdaLo_src, + tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; +def t2LSRL : t2MVEShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + (ARMlsrl tGPREven:$RdaLo_src, + tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; def t2SQRSHRL : t2MVEShiftDRegReg<"sqrshrl", 0b1, 0b1>; def t2SQSHLL : t2MVEShiftDRegImm<"sqshll", 0b11, 0b1>; Index: llvm/test/CodeGen/ARM/shift_parts.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/shift_parts.ll @@ -0,0 +1,193 @@ +; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1-m.main-none-eabi -mattr=+mve %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-MVE +; RUN: llc --verify-machineinstrs -mtriple=thumbv8.1-m.main-none-eabi %s -o - | FileCheck %s -check-prefix=CHECK --check-prefix=CHECK-NON-MVE + +define i64 @shift_left_reg(i64 %x, i64 %y) { +; CHECK-MVE-LABEL: shift_left_reg: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: lsll r0, r1, r2 +; +; CHECK-NON-MVE-LABEL: shift_left_reg: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: .save {r7, lr} +; CHECK-NON-MVE-NEXT: push {r7, lr} +; CHECK-NON-MVE-NEXT: bl __aeabi_llsl +entry: + %shl = shl i64 %x, %y + ret i64 %shl +} + +define i64 @shift_left_imm(i64 %x) { +; CHECK-MVE-LABEL: shift_left_imm: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: lsll r0, r1, #3 +; +; CHECK-NON-MVE-LABEL: shift_left_imm: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: lsrs r2, r0, #29 +; CHECK-NON-MVE-NEXT: lsls r1, r1, #3 +; CHECK-NON-MVE-NEXT: adds r1, r1, r2 +; CHECK-NON-MVE-NEXT: lsls r0, r0, #3 +entry: + %shl = shl i64 %x, 3 + ret i64 %shl +} + +define i64 @shift_left_imm_big(i64 %x) { +; CHECK-LABEL: shift_left_imm_big: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r1, r0, #16 +; CHECK-NEXT: movs r0, #0 +entry: + %shl = shl i64 %x, 48 + ret i64 %shl +} + +define i64 @shift_left_imm_big2(i64 %x) { +; CHECK-MVE-LABEL: shift_left_imm_big2: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: lsll r0, r1, #32 +; +; CHECK-NON-MVE-LABEL: shift_left_imm_big2: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: movs r1, r0 +; CHECK-NON-MVE-NEXT: movs r0, #0 +entry: + %shl = shl i64 %x, 32 + ret i64 %shl +} + +define i64 @shift_left_imm_big3(i64 %x) { +; CHECK-LABEL: shift_left_imm_big3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsls r1, r0, #1 +; CHECK-NEXT: movs r0, #0 +entry: + %shl = shl i64 %x, 33 + ret i64 %shl +} + +define i64 @shift_right_reg(i64 %x, i64 %y) { +; CHECK-MVE-LABEL: shift_right_reg: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: rsbs r2, r2, #0 +; CHECK-MVE-NEXT: lsll r0, r1, r2 +; +; CHECK-NON-MVE-LABEL: shift_right_reg: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: .save {r7, lr} +; CHECK-NON-MVE-NEXT: push {r7, lr} +; CHECK-NON-MVE-NEXT: bl __aeabi_llsr +entry: + %shr = lshr i64 %x, %y + ret i64 %shr +} + +define i64 @shift_right_imm(i64 %x) { +; CHECK-MVE-LABEL: shift_right_imm: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: lsrl r0, r1, #3 +; +; CHECK-NON-MVE-LABEL: shift_right_imm: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: lsls r2, r1, #29 +; CHECK-NON-MVE-NEXT: lsrs r0, r0, #3 +; CHECK-NON-MVE-NEXT: adds r0, r0, r2 +; CHECK-NON-MVE-NEXT: lsrs r1, r1, #3 +entry: + %shr = lshr i64 %x, 3 + ret i64 %shr +} + +define i64 @shift_right_imm_big(i64 %x) { +; CHECK-LABEL: shift_right_imm_big: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r0, r1, #16 +; CHECK-NEXT: movs r1, #0 +entry: + %shr = lshr i64 %x, 48 + ret i64 %shr +} + +define i64 @shift_right_imm_big2(i64 %x) { +; CHECK-MVE-LABEL: shift_right_imm_big2: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: lsrl r0, r1, #32 +; +; CHECK-NON-MVE-LABEL: shift_right_imm_big2: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: movs r0, r1 +; CHECK-NON-MVE-NEXT: movs r1, #0 +entry: + %shr = lshr i64 %x, 32 + ret i64 %shr +} + +define i64 @shift_right_imm_big3(i64 %x) { +; CHECK-LABEL: shift_right_imm_big3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: lsrs r0, r1, #1 +; CHECK-NEXT: movs r1, #0 +entry: + %shr = lshr i64 %x, 33 + ret i64 %shr +} + +define i64 @shift_arithmetic_right_reg(i64 %x, i64 %y) { +; CHECK-MVE-LABEL: shift_arithmetic_right_reg: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: asrl r0, r1, r2 +; +; CHECK-NON-MVE-LABEL: shift_arithmetic_right_reg: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: .save {r7, lr} +; CHECK-NON-MVE-NEXT: push {r7, lr} +; CHECK-NON-MVE-NEXT: bl __aeabi_lasr +entry: + %shr = ashr i64 %x, %y + ret i64 %shr +} + +define i64 @shift_arithmetic_right_imm(i64 %x) { +; CHECK-MVE-LABEL: shift_arithmetic_right_imm: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: asrl r0, r1, #3 +; +; CHECK-NON-MVE-LABEL: shift_arithmetic_right_imm: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: lsls r2, r1, #29 +; CHECK-NON-MVE-NEXT: lsrs r0, r0, #3 +; CHECK-NON-MVE-NEXT: adds r0, r0, r2 +; CHECK-NON-MVE-NEXT: asrs r1, r1, #3 +entry: + %shr = ashr i64 %x, 3 + ret i64 %shr +} + +%struct.bar = type { i16, i8, [5 x i8] } + +; Function Attrs: norecurse nounwind +define arm_aapcs_vfpcc void @fn1(%struct.bar* nocapture %a) { +; CHECK-MVE-LABEL: fn1: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: ldr r2, [r0, #4] +; CHECK-MVE-NEXT: movs r1, #0 +; CHECK-MVE-NEXT: lsll r2, r1, #8 +; CHECK-MVE-NEXT: strb r1, [r0, #7] +; CHECK-MVE-NEXT: str.w r2, [r0, #3] +; +; CHECK-NON-MVE-LABEL: fn1: +; CHECK-NON-MVE: @ %bb.0: @ %entry +; CHECK-NON-MVE-NEXT: ldr r1, [r0, #4] +; CHECK-NON-MVE-NEXT: lsls r2, r1, #8 +; CHECK-NON-MVE-NEXT: movs r3, #3 +; CHECK-NON-MVE-NEXT: str r2, [r0, r3] +; CHECK-NON-MVE-NEXT: lsrs r1, r1, #24 +; CHECK-NON-MVE-NEXT: strb r1, [r0, #7] +entry: + %carey = getelementptr inbounds %struct.bar, %struct.bar* %a, i32 0, i32 2 + %0 = bitcast [5 x i8]* %carey to i40* + %bf.load = load i40, i40* %0, align 1 + %bf.clear = and i40 %bf.load, -256 + store i40 %bf.clear, i40* %0, align 1 + ret void +}