Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -31,7 +31,10 @@ SELECT_CC, BuildPairF64, SplitF64, - TAIL + TAIL, + SHLW, + SRAW, + SRLW }; } @@ -57,6 +60,8 @@ // Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -80,10 +80,10 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (Subtarget.is64Bit()) { - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::ANY_EXTEND); + setOperationAction(ISD::SHL, MVT::i32, Custom); + setOperationAction(ISD::SRA, MVT::i32, Custom); + setOperationAction(ISD::SRL, MVT::i32, Custom); } if (!Subtarget.hasStdExtM()) { @@ -512,15 +512,56 @@ return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); } -// Return true if the given node is a shift with a non-constant shift amount. -static bool isVariableShift(SDValue Val) { - switch (Val.getOpcode()) { +// Returns the opcode of the target-specific SDNode that implements the 32-bit +// form of the given Opcode. +static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { + switch (Opcode) { default: - return false; + llvm_unreachable("Unexpected opcode"); + case ISD::SHL: + return RISCVISD::SHLW; + case ISD::SRA: + return RISCVISD::SRAW; + case ISD::SRL: + return RISCVISD::SRLW; + } +} + +// Converts the given 32-bit operation to a target-specific SelectionDAG node. +// Because i32 isn't a legal type for RV64, these operations would otherwise +// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W +// later one because the fact the operation was originally of type i32 is +// lost. +static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); + // We need to replace the i32 node with another i32 node. Replacing with + // an i64 node doesn't trigger an assert, but does lead to problems in the + // case that the result was extended. This is because the DAG combiner + // assumes that ty->ty no-op extends don't exist due to the folding logic + // in DAG.getNode. + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); +} + +void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + SDLoc DL(N); + switch (N->getOpcode()) { + default: + llvm_unreachable("Don't know how to custom type legalize this operation!"); case ISD::SHL: case ISD::SRA: case ISD::SRL: - return Val.getOperand(1).getOpcode() != ISD::Constant; + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + if (N->getOperand(1).getOpcode() == ISD::Constant) + return; + Results.push_back(customLegalizeToWOp(N, DAG)); + break; } } @@ -545,34 +586,14 @@ switch (N->getOpcode()) { default: break; - case ISD::SHL: - case ISD::SRL: - case ISD::SRA: { - assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only"); - if (!DCI.isBeforeLegalize()) - break; - SDValue RHS = N->getOperand(1); - if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant || - (RHS->getOpcode() == ISD::AssertZext && - cast(RHS->getOperand(1))->getVT().getSizeInBits() <= 5)) - break; - SDValue LHS = N->getOperand(0); - SDLoc DL(N); - SDValue NewRHS = - DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5))); - return DCI.CombineTo( - N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS)); - } case ISD::ANY_EXTEND: { - // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64, - // then instead sign-extend in order to increase the chance of being able - // to select the sllw/srlw/sraw/divw/divuw/remuw instructions. + // If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend + // in order to increase the chance of being able to select the + // divw/divuw/remuw instructions. SDValue Src = N->getOperand(0); if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32) break; - if (!isVariableShift(Src) && - !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src))) + if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src))) break; SDLoc DL(N); // Don't add the new node to the DAGCombiner worklist, in order to avoid @@ -1682,6 +1703,12 @@ return "RISCVISD::SplitF64"; case RISCVISD::TAIL: return "RISCVISD::TAIL"; + case RISCVISD::SHLW: + return "RISCVISD::SHLW"; + case RISCVISD::SRAW: + return "RISCVISD::SRAW"; + case RISCVISD::SRLW: + return "RISCVISD::SRLW"; } return nullptr; } Index: lib/Target/RISCV/RISCVInstrInfo.td =================================================================== --- lib/Target/RISCV/RISCVInstrInfo.td +++ lib/Target/RISCV/RISCVInstrInfo.td @@ -51,6 +51,9 @@ def riscv_tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def riscv_shlw : SDNode<"RISCVISD::SHLW", SDTIntShiftOp>; +def riscv_sraw : SDNode<"RISCVISD::SRAW", SDTIntShiftOp>; +def riscv_srlw : SDNode<"RISCVISD::SRLW", SDTIntShiftOp>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -668,21 +671,14 @@ def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ return cast(N->getOperand(1))->getVT() == MVT::i32; }]>; -def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{ - return cast(N->getOperand(1))->getVT().getSizeInBits() <= 5; -}]>; def zexti32 : PatFrags<(ops node:$src), [(and node:$src, 0xffffffff), (assertzexti32 node:$src)]>; -// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable -// with a shiftw operation. The mask mustn't modify the lower 5 bits or the -// upper 32 bits. +// Defines a legal mask for (and src, mask) to be combinable with a shiftw +// operation. The mask mustn't modify the lower 5 bits or the upper 32 bits. def shiftwamt_mask : ImmLeaf(Imm) >= 5 && isUInt<32>(Imm); }]>; -def shiftwamt : PatFrags<(ops node:$src), - [(assertzexti5 (and node:$src, shiftwamt_mask)), - (assertzexti5 node:$src)]>; /// Immediates @@ -942,28 +938,20 @@ def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt), (SRAIW GPR:$rs1, uimm5:$shamt)>; -// For variable-length shifts, we rely on assertzexti5 being inserted during -// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to -// guarantee that selecting a 32-bit variable shift is legal (as the variable -// shift is known to be <= 32). We must also be careful not to create -// semantically incorrect patterns. For instance, selecting SRLW for -// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), -// is not guaranteed to be safe, as we don't know whether the upper 32-bits of -// the result are used or not (in the case where rs2=0, this is a -// sign-extension operation). - -def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32), - (SLLW GPR:$rs1, GPR:$rs2)>; -def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))), - (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>; - -def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32), - (SRLW GPR:$rs1, GPR:$rs2)>; -def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))), - (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>; - -def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), - (SRAW GPR:$rs1, GPR:$rs2)>; +multiclass ShiftWPat { + def : Pat<(ShiftWOp GPR:$rs1, (and GPR:$rs2, shiftwamt_mask)), + (Inst GPR:$rs1, GPR:$rs2)>; + def : Pat<(ShiftWOp GPR:$rs1, GPR:$rs2), + (Inst GPR:$rs1, GPR:$rs2)>; + def : Pat<(sext_inreg (ShiftWOp GPR:$rs1, (and GPR:$rs2, shiftwamt_mask)), i32), + (Inst GPR:$rs1, GPR:$rs2)>; + def : Pat<(sext_inreg (ShiftWOp GPR:$rs1, GPR:$rs2), i32), + (Inst GPR:$rs1, GPR:$rs2)>; +} + +defm : ShiftWPat; +defm : ShiftWPat; +defm : ShiftWPat; /// Loads Index: test/CodeGen/RISCV/atomic-cmpxchg.ll =================================================================== --- test/CodeGen/RISCV/atomic-cmpxchg.ll +++ test/CodeGen/RISCV/atomic-cmpxchg.ll @@ -61,8 +61,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_monotonic_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -139,8 +139,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_acquire_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -217,8 +217,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_acquire_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -295,8 +295,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_release_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -373,8 +373,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_release_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -451,8 +451,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_acq_rel_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -529,8 +529,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_acq_rel_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -607,8 +607,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_seq_cst_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -685,8 +685,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_seq_cst_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -763,8 +763,8 @@ ; ; RV64IA-LABEL: cmpxchg_i8_seq_cst_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: addi a4, zero, 255 ; RV64IA-NEXT: sllw a4, a4, a3 ; RV64IA-NEXT: andi a2, a2, 255 @@ -846,8 +846,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -926,8 +926,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1006,8 +1006,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1086,8 +1086,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1166,8 +1166,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1246,8 +1246,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1326,8 +1326,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1406,8 +1406,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1486,8 +1486,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 @@ -1566,8 +1566,8 @@ ; RV64IA-NEXT: addiw a3, a3, -1 ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: and a2, a2, a3 -; RV64IA-NEXT: andi a4, a0, 3 -; RV64IA-NEXT: slli a4, a4, 3 +; RV64IA-NEXT: slli a4, a0, 3 +; RV64IA-NEXT: andi a4, a4, 24 ; RV64IA-NEXT: sllw a3, a3, a4 ; RV64IA-NEXT: sllw a2, a2, a4 ; RV64IA-NEXT: sllw a1, a1, a4 Index: test/CodeGen/RISCV/atomic-rmw.ll =================================================================== --- test/CodeGen/RISCV/atomic-rmw.ll +++ test/CodeGen/RISCV/atomic-rmw.ll @@ -52,8 +52,8 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -118,8 +118,8 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -184,8 +184,8 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -250,8 +250,8 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -316,8 +316,8 @@ ; ; RV64IA-LABEL: atomicrmw_xchg_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -382,8 +382,8 @@ ; ; RV64IA-LABEL: atomicrmw_add_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -448,8 +448,8 @@ ; ; RV64IA-LABEL: atomicrmw_add_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -514,8 +514,8 @@ ; ; RV64IA-LABEL: atomicrmw_add_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -580,8 +580,8 @@ ; ; RV64IA-LABEL: atomicrmw_add_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -646,8 +646,8 @@ ; ; RV64IA-LABEL: atomicrmw_add_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -712,8 +712,8 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -778,8 +778,8 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -844,8 +844,8 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -910,8 +910,8 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -976,8 +976,8 @@ ; ; RV64IA-LABEL: atomicrmw_sub_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -1037,11 +1037,11 @@ ; RV64IA-LABEL: atomicrmw_and_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sll a3, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 ; RV64IA-NEXT: or a1, a3, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -1091,11 +1091,11 @@ ; RV64IA-LABEL: atomicrmw_and_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sll a3, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 ; RV64IA-NEXT: or a1, a3, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -1145,11 +1145,11 @@ ; RV64IA-LABEL: atomicrmw_and_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sll a3, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 ; RV64IA-NEXT: or a1, a3, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -1199,11 +1199,11 @@ ; RV64IA-LABEL: atomicrmw_and_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sll a3, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 ; RV64IA-NEXT: or a1, a3, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -1253,11 +1253,11 @@ ; RV64IA-LABEL: atomicrmw_and_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: addi a3, zero, 255 -; RV64IA-NEXT: sll a3, a3, a2 +; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: not a3, a3 ; RV64IA-NEXT: or a1, a3, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -1313,8 +1313,8 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -1381,8 +1381,8 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -1449,8 +1449,8 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -1517,8 +1517,8 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -1585,8 +1585,8 @@ ; ; RV64IA-LABEL: atomicrmw_nand_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a3, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -1643,9 +1643,9 @@ ; RV64IA-LABEL: atomicrmw_or_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -1689,9 +1689,9 @@ ; RV64IA-LABEL: atomicrmw_or_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -1735,9 +1735,9 @@ ; RV64IA-LABEL: atomicrmw_or_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -1781,9 +1781,9 @@ ; RV64IA-LABEL: atomicrmw_or_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -1827,9 +1827,9 @@ ; RV64IA-LABEL: atomicrmw_or_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -1873,9 +1873,9 @@ ; RV64IA-LABEL: atomicrmw_xor_i8_monotonic: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -1919,9 +1919,9 @@ ; RV64IA-LABEL: atomicrmw_xor_i8_acquire: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -1965,9 +1965,9 @@ ; RV64IA-LABEL: atomicrmw_xor_i8_release: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -2011,9 +2011,9 @@ ; RV64IA-LABEL: atomicrmw_xor_i8_acq_rel: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -2057,9 +2057,9 @@ ; RV64IA-LABEL: atomicrmw_xor_i8_seq_cst: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a1, a1, 255 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -3738,8 +3738,8 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -3880,8 +3880,8 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -4022,8 +4022,8 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -4170,8 +4170,8 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -4312,8 +4312,8 @@ ; ; RV64IA-LABEL: atomicrmw_umax_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -4448,8 +4448,8 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -4590,8 +4590,8 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_acquire: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -4732,8 +4732,8 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_release: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -4880,8 +4880,8 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -5022,8 +5022,8 @@ ; ; RV64IA-LABEL: atomicrmw_umin_i8_seq_cst: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 ; RV64IA-NEXT: addi a3, zero, 255 ; RV64IA-NEXT: sllw a6, a3, a2 ; RV64IA-NEXT: andi a1, a1, 255 @@ -5096,8 +5096,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5164,8 +5164,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5232,8 +5232,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5300,8 +5300,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5368,8 +5368,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5436,8 +5436,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5504,8 +5504,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5572,8 +5572,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5640,8 +5640,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5708,8 +5708,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5776,8 +5776,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5844,8 +5844,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5912,8 +5912,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -5980,8 +5980,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6048,8 +6048,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6110,10 +6110,10 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 -; RV64IA-NEXT: sll a1, a1, a3 -; RV64IA-NEXT: sll a2, a2, a3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: not a2, a2 ; RV64IA-NEXT: or a1, a2, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6166,10 +6166,10 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 -; RV64IA-NEXT: sll a1, a1, a3 -; RV64IA-NEXT: sll a2, a2, a3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: not a2, a2 ; RV64IA-NEXT: or a1, a2, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6222,10 +6222,10 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 -; RV64IA-NEXT: sll a1, a1, a3 -; RV64IA-NEXT: sll a2, a2, a3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: not a2, a2 ; RV64IA-NEXT: or a1, a2, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6278,10 +6278,10 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 -; RV64IA-NEXT: sll a1, a1, a3 -; RV64IA-NEXT: sll a2, a2, a3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: not a2, a2 ; RV64IA-NEXT: or a1, a2, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6334,10 +6334,10 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 -; RV64IA-NEXT: sll a1, a1, a3 -; RV64IA-NEXT: sll a2, a2, a3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 +; RV64IA-NEXT: sllw a1, a1, a3 +; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: not a2, a2 ; RV64IA-NEXT: or a1, a2, a1 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6397,8 +6397,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6467,8 +6467,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6537,8 +6537,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6607,8 +6607,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6677,8 +6677,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a2, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -6737,9 +6737,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -6787,9 +6787,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -6837,9 +6837,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -6887,9 +6887,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -6937,9 +6937,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -6987,9 +6987,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -7037,9 +7037,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aq a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -7087,9 +7087,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.rl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -7137,9 +7137,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -7187,9 +7187,9 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a2, a0, 3 -; RV64IA-NEXT: slli a2, a2, 3 -; RV64IA-NEXT: sll a1, a1, a2 +; RV64IA-NEXT: slli a2, a0, 3 +; RV64IA-NEXT: andi a2, a2, 24 +; RV64IA-NEXT: sllw a1, a1, a2 ; RV64IA-NEXT: andi a0, a0, -4 ; RV64IA-NEXT: amoxor.w.aqrl a0, a1, (a0) ; RV64IA-NEXT: srlw a0, a0, a2 @@ -8900,8 +8900,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -9052,8 +9052,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -9204,8 +9204,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -9362,8 +9362,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -9514,8 +9514,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -9660,8 +9660,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -9812,8 +9812,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -9964,8 +9964,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -10122,8 +10122,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 @@ -10274,8 +10274,8 @@ ; RV64IA-NEXT: lui a2, 16 ; RV64IA-NEXT: addiw a2, a2, -1 ; RV64IA-NEXT: and a1, a1, a2 -; RV64IA-NEXT: andi a3, a0, 3 -; RV64IA-NEXT: slli a3, a3, 3 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: andi a3, a3, 24 ; RV64IA-NEXT: sllw a6, a2, a3 ; RV64IA-NEXT: sllw a1, a1, a3 ; RV64IA-NEXT: andi a0, a0, -4 Index: test/CodeGen/RISCV/pr40333.ll =================================================================== --- test/CodeGen/RISCV/pr40333.ll +++ test/CodeGen/RISCV/pr40333.ll @@ -7,17 +7,10 @@ ; loop would be created in DAGCombine, converting ANY_EXTEND to SIGN_EXTEND ; and back again. -; TODO: This test case is also an example of where it would be cheaper to -; select SRLW, but the current lowering strategy fails to do so. - define signext i8 @foo(i32 %a, i32 %b) nounwind { ; RV64I-LABEL: foo: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a1, 32 -; RV64I-NEXT: srli a1, a1, 32 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: srl a0, a0, a1 +; RV64I-NEXT: srlw a0, a0, a1 ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ret