diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8877,13 +8877,9 @@ if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) return SDValue(); - // TODO: This is limited to early combining because it may reveal regressions - // otherwise. But since we just checked a target hook to see if this is - // desirable, that should have filtered out cases where this interferes - // with some other pattern matching. - if (!LegalTypes) - if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) - return R; + // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)). + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -173,6 +173,9 @@ bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; + EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -839,6 +839,39 @@ return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32; } +bool AMDGPUTargetLowering::isDesirableToCommuteWithShift( + const SDNode* N, CombineLevel Level) const { + assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && + "Expected shift op"); + // Always commute pre-type legalization and right shifts. + // We're looking for shl(or(x,y),z) patterns. + if (Level < CombineLevel::AfterLegalizeTypes || + N->getOpcode() != ISD::SHL || N->getOperand(0).getOpcode() != ISD::OR) + return true; + + // If only user is a i32 right-shift, then don't destroy a BFE pattern. + if (N->getValueType(0) == MVT::i32 && N->use_size() == 1 && + (N->use_begin()->getOpcode() == ISD::SRA || + N->use_begin()->getOpcode() == ISD::SRL)) + return false; + + // Don't destroy or(shl(load_zext(),c), load_zext()) patterns. + auto IsShiftAndLoad = [](SDValue LHS, SDValue RHS) { + if (LHS.getOpcode() != ISD::SHL) + return false; + auto *RHSLd = dyn_cast(RHS); + auto *LHS0 = dyn_cast(LHS.getOperand(0)); + auto *LHS1 = dyn_cast(LHS.getOperand(1)); + return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() == ISD::ZEXTLOAD && + LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() && + RHSLd->getExtensionType() == ISD::ZEXTLOAD; + }; + SDValue LHS = N->getOperand(0).getOperand(0); + SDValue RHS = N->getOperand(0).getOperand(1); + return !(IsShiftAndLoad(LHS, RHS) || IsShiftAndLoad(RHS, LHS)); +} + //===---------------------------------------------------------------------===// // TargetLowering Callbacks //===---------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -1448,24 +1448,23 @@ ; SI-NEXT: s_mov_b32 s7, s3 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 -; SI-NEXT: v_lshrrev_b32_e32 v6, 24, v4 -; SI-NEXT: v_and_b32_e32 v7, 0xff00, v4 ; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 ; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 ; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 -; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 +; SI-NEXT: v_add_i32_e32 v7, vcc, 9, v4 +; SI-NEXT: v_and_b32_e32 v6, 0xff00, v4 ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_and_b32_e32 v0, 0xff, v4 -; SI-NEXT: v_add_i32_e32 v2, vcc, 9, v5 -; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v6 -; SI-NEXT: v_or_b32_e32 v0, v7, v0 -; SI-NEXT: v_and_b32_e32 v2, 0xff, v2 +; SI-NEXT: v_and_b32_e32 v0, 0xff, v7 +; SI-NEXT: v_add_i32_e32 v1, vcc, 9, v5 +; SI-NEXT: v_or_b32_e32 v0, v6, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xff, v1 +; SI-NEXT: v_and_b32_e32 v4, 0xff000000, v4 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x900, v0 -; SI-NEXT: v_or_b32_e32 v1, v1, v2 -; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_or_b32_e32 v1, v4, v1 ; SI-NEXT: v_or_b32_e32 v0, v1, v0 ; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll @@ -151,26 +151,26 @@ ; GFX7-ALIGNED-LABEL: global_load_2xi16_align1: ; GFX7-ALIGNED: ; %bb.0: ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc -; GFX7-ALIGNED-NEXT: flat_load_ubyte v4, v[2:3] -; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 3, v0 +; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 2, v0 ; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc -; GFX7-ALIGNED-NEXT: flat_load_ubyte v5, v[0:1] +; GFX7-ALIGNED-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc +; GFX7-ALIGNED-NEXT: v_add_i32_e32 v6, vcc, 3, v0 +; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc +; GFX7-ALIGNED-NEXT: flat_load_ubyte v6, v[6:7] +; GFX7-ALIGNED-NEXT: flat_load_ubyte v4, v[4:5] ; GFX7-ALIGNED-NEXT: flat_load_ubyte v2, v[2:3] -; GFX7-ALIGNED-NEXT: v_add_i32_e32 v0, vcc, 2, v0 -; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX7-ALIGNED-NEXT: flat_load_ubyte v0, v[0:1] ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v4 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v6 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-ALIGNED-NEXT: v_or_b32_e32 v1, v1, v5 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v4 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) -; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX7-ALIGNED-NEXT: v_or_b32_e32 v1, v3, v2 +; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-UNALIGNED-LABEL: global_load_2xi16_align1: diff --git a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll --- a/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll +++ b/llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.private.ll @@ -208,19 +208,19 @@ ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v1, vcc, 2, v0 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 1, v0 ; GFX7-ALIGNED-NEXT: v_add_i32_e32 v3, vcc, 3, v0 -; GFX7-ALIGNED-NEXT: buffer_load_ubyte v2, v2, s[0:3], 0 offen ; GFX7-ALIGNED-NEXT: buffer_load_ubyte v3, v3, s[0:3], 0 offen -; GFX7-ALIGNED-NEXT: buffer_load_ubyte v0, v0, s[0:3], 0 offen +; GFX7-ALIGNED-NEXT: buffer_load_ubyte v2, v2, s[0:3], 0 offen ; GFX7-ALIGNED-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen +; GFX7-ALIGNED-NEXT: buffer_load_ubyte v0, v0, s[0:3], 0 offen ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(3) -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 8, v2 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v3 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(2) -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 8, v3 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 8, v2 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(1) -; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) +; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-ALIGNED-NEXT: v_or_b32_e32 v1, v3, v1 -; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -2818,50 +2818,50 @@ ; GFX7-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 ; GFX7-NEXT: s_addc_u32 s13, s13, 0 ; GFX7-NEXT: s_waitcnt vmcnt(2) -; GFX7-NEXT: v_bfe_i32 v7, v2, 0, 4 -; GFX7-NEXT: v_bfe_i32 v3, v2, 24, 4 +; GFX7-NEXT: v_bfe_i32 v8, v2, 0, 4 +; GFX7-NEXT: v_ashrrev_i32_e32 v3, 28, v2 ; GFX7-NEXT: s_waitcnt vmcnt(1) -; GFX7-NEXT: v_bfe_i32 v14, v0, 0, 4 -; GFX7-NEXT: v_bfe_i32 v4, v2, 20, 4 -; GFX7-NEXT: v_bfe_i32 v5, v2, 16, 4 -; GFX7-NEXT: v_bfe_i32 v6, v2, 8, 4 -; GFX7-NEXT: v_ashrrev_i32_e32 v8, 28, v2 +; GFX7-NEXT: v_bfe_i32 v15, v0, 0, 4 +; GFX7-NEXT: v_bfe_i32 v4, v2, 24, 4 +; GFX7-NEXT: v_bfe_i32 v5, v2, 20, 4 +; GFX7-NEXT: v_bfe_i32 v6, v2, 16, 4 +; GFX7-NEXT: v_bfe_i32 v7, v2, 8, 4 ; GFX7-NEXT: v_bfe_i32 v9, v2, 12, 4 ; GFX7-NEXT: v_bfe_i32 v2, v2, 4, 4 -; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX7-NEXT: v_bfe_i32 v10, v0, 24, 4 -; GFX7-NEXT: v_bfe_i32 v11, v0, 20, 4 -; GFX7-NEXT: v_bfe_i32 v12, v0, 16, 4 -; GFX7-NEXT: v_bfe_i32 v13, v0, 8, 4 -; GFX7-NEXT: v_ashrrev_i32_e32 v15, 28, v0 +; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v8 +; GFX7-NEXT: v_ashrrev_i32_e32 v10, 28, v0 +; GFX7-NEXT: v_bfe_i32 v11, v0, 24, 4 +; GFX7-NEXT: v_bfe_i32 v12, v0, 20, 4 +; GFX7-NEXT: v_bfe_i32 v13, v0, 16, 4 +; GFX7-NEXT: v_bfe_i32 v14, v0, 8, 4 ; GFX7-NEXT: v_bfe_i32 v16, v0, 12, 4 ; GFX7-NEXT: v_bfe_i32 v0, v0, 4, 4 -; GFX7-NEXT: v_and_b32_e32 v14, 0xff, v14 +; GFX7-NEXT: v_and_b32_e32 v15, 0xff, v15 ; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1 -; GFX7-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX7-NEXT: v_mad_u32_u24 v1, v8, v15, v1 +; GFX7-NEXT: v_and_b32_e32 v7, 0xff, v7 ; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX7-NEXT: v_and_b32_e32 v14, 0xff, v14 ; GFX7-NEXT: v_lshlrev_b32_e32 v16, 24, v16 ; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1 ; GFX7-NEXT: v_alignbit_b32 v9, 0, v9, 24 ; GFX7-NEXT: v_alignbit_b32 v16, 0, v16, 24 -; GFX7-NEXT: v_mad_u32_u24 v0, v6, v13, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v7, v14, v0 +; GFX7-NEXT: v_and_b32_e32 v6, 0xff, v6 +; GFX7-NEXT: v_and_b32_e32 v13, 0xff, v13 +; GFX7-NEXT: v_mad_u32_u24 v0, v9, v16, v0 ; GFX7-NEXT: v_and_b32_e32 v5, 0xff, v5 ; GFX7-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX7-NEXT: v_mad_u32_u24 v0, v9, v16, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v6, v13, v0 ; GFX7-NEXT: v_and_b32_e32 v4, 0xff, v4 ; GFX7-NEXT: v_and_b32_e32 v11, 0xff, v11 ; GFX7-NEXT: v_mad_u32_u24 v0, v5, v12, v0 ; GFX7-NEXT: v_and_b32_e32 v3, 0xff, v3 ; GFX7-NEXT: v_and_b32_e32 v10, 0xff, v10 ; GFX7-NEXT: v_mad_u32_u24 v0, v4, v11, v0 -; GFX7-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX7-NEXT: v_and_b32_e32 v15, 0xff, v15 ; GFX7-NEXT: v_mad_u32_u24 v0, v3, v10, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, v8, v15, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll --- a/llvm/test/CodeGen/AMDGPU/idot8u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll @@ -2444,40 +2444,36 @@ ; GFX7-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 ; GFX7-NEXT: s_addc_u32 s13, s13, 0 ; GFX7-NEXT: s_waitcnt vmcnt(2) -; GFX7-NEXT: v_and_b32_e32 v7, 15, v2 -; GFX7-NEXT: v_bfe_u32 v6, v2, 4, 4 +; GFX7-NEXT: v_and_b32_e32 v8, 15, v2 +; GFX7-NEXT: v_bfe_u32 v3, v2, 24, 4 ; GFX7-NEXT: s_waitcnt vmcnt(1) -; GFX7-NEXT: v_and_b32_e32 v14, 15, v0 -; GFX7-NEXT: v_bfe_u32 v8, v2, 12, 4 -; GFX7-NEXT: v_bfe_u32 v13, v0, 4, 4 -; GFX7-NEXT: v_bfe_u32 v15, v0, 12, 4 +; GFX7-NEXT: v_and_b32_e32 v15, 15, v0 +; GFX7-NEXT: v_bfe_u32 v4, v2, 20, 4 +; GFX7-NEXT: v_bfe_u32 v5, v2, 16, 4 +; GFX7-NEXT: v_bfe_u32 v6, v2, 8, 4 +; GFX7-NEXT: v_bfe_u32 v7, v2, 4, 4 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 28, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 12, v2 +; GFX7-NEXT: v_bfe_u32 v10, v0, 24, 4 +; GFX7-NEXT: v_bfe_u32 v11, v0, 20, 4 +; GFX7-NEXT: v_bfe_u32 v12, v0, 16, 4 +; GFX7-NEXT: v_bfe_u32 v13, v0, 8, 4 +; GFX7-NEXT: v_bfe_u32 v14, v0, 4, 4 +; GFX7-NEXT: v_lshrrev_b32_e32 v16, 28, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 12, v0 ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mad_u32_u24 v1, v8, v15, v1 +; GFX7-NEXT: v_and_b32_e32 v2, 0xf000000, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xf000000, v0 ; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1 -; GFX7-NEXT: v_bfe_u32 v5, v2, 8, 4 -; GFX7-NEXT: v_bfe_u32 v12, v0, 8, 4 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v15, 24, v15 +; GFX7-NEXT: v_alignbit_b32 v2, s10, v2, 24 +; GFX7-NEXT: v_alignbit_b32 v0, 0, v0, 24 ; GFX7-NEXT: v_mad_u32_u24 v1, v6, v13, v1 -; GFX7-NEXT: v_alignbit_b32 v8, 0, v8, 24 -; GFX7-NEXT: v_alignbit_b32 v14, 0, v15, 24 -; GFX7-NEXT: v_mad_u32_u24 v1, v5, v12, v1 -; GFX7-NEXT: v_bfe_u32 v4, v2, 16, 4 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 28, v2 -; GFX7-NEXT: v_bfe_u32 v11, v0, 16, 4 -; GFX7-NEXT: v_lshrrev_b32_e32 v16, 28, v0 -; GFX7-NEXT: v_mad_u32_u24 v1, v8, v14, v1 -; GFX7-NEXT: v_bfe_u32 v3, v2, 20, 4 -; GFX7-NEXT: v_bfe_u32 v10, v0, 20, 4 -; GFX7-NEXT: v_alignbit_b32 v2, v9, v2, 24 -; GFX7-NEXT: v_alignbit_b32 v0, v16, v0, 24 -; GFX7-NEXT: v_mad_u32_u24 v1, v4, v11, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GFX7-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX7-NEXT: v_mad_u32_u24 v1, v3, v10, v1 ; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1 -; GFX7-NEXT: v_mad_u32_u24 v0, v9, v7, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v5, v12, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v4, v11, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v3, v10, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v9, v16, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -1060,17 +1060,18 @@ ; GCN-NEXT: s_lshr_b32 s42, s7, 22 ; GCN-NEXT: s_lshr_b32 s43, s7, 23 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x77 -; GCN-NEXT: v_mov_b32_e32 v16, s43 +; GCN-NEXT: v_mov_b32_e32 v14, s43 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x76 -; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc +; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc ; GCN-NEXT: v_mov_b32_e32 v17, s42 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v14, 3, v14 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x75 -; GCN-NEXT: v_or_b32_e32 v16, v17, v16 +; GCN-NEXT: v_or_b32_e32 v14, v14, v17 ; GCN-NEXT: v_mov_b32_e32 v17, s41 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x74 @@ -1081,10 +1082,9 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: v_and_b32_e32 v17, 3, v17 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x73 -; GCN-NEXT: v_or_b32_e32 v16, v17, v16 +; GCN-NEXT: v_or_b32_e32 v14, v17, v14 ; GCN-NEXT: v_mov_b32_e32 v17, s39 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x72 @@ -1092,10 +1092,11 @@ ; GCN-NEXT: v_mov_b32_e32 v18, s38 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 3, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x71 -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 ; GCN-NEXT: v_mov_b32_e32 v18, s37 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x70 @@ -1106,13 +1107,12 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 ; GCN-NEXT: v_and_b32_e32 v17, 15, v17 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7f -; GCN-NEXT: v_or_b32_e32 v16, v17, v16 +; GCN-NEXT: v_or_b32_e32 v14, v17, v14 ; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s35 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7e @@ -1120,10 +1120,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 3, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7d -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 ; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s35 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7c @@ -1134,7 +1135,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x7b ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 @@ -1145,81 +1145,82 @@ ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc -; GCN-NEXT: s_cmpk_lg_i32 s0, 0x78 -; GCN-NEXT: v_mov_b32_e32 v14, s35 -; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 +; GCN-NEXT: s_cmpk_lg_i32 s0, 0x78 +; GCN-NEXT: v_mov_b32_e32 v12, s35 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 3, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v19, 2, v19 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x79 -; GCN-NEXT: v_or_b32_e32 v18, v19, v18 +; GCN-NEXT: v_or_b32_e32 v18, v18, v19 ; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s35 -; GCN-NEXT: v_cndmask_b32_e32 v14, 1, v14, vcc +; GCN-NEXT: v_cndmask_b32_e32 v12, 1, v12, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc -; GCN-NEXT: v_and_b32_e32 v14, 1, v14 +; GCN-NEXT: v_and_b32_e32 v12, 1, v12 ; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 -; GCN-NEXT: v_or_b32_e32 v14, v14, v19 -; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 -; GCN-NEXT: v_and_b32_e32 v14, 3, v14 -; GCN-NEXT: v_or_b32_e32 v14, v14, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 -; GCN-NEXT: v_and_b32_e32 v14, 15, v14 -; GCN-NEXT: v_or_b32_sdwa v14, v14, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v12, v12, v19 +; GCN-NEXT: v_and_b32_e32 v12, 3, v12 +; GCN-NEXT: v_or_b32_e32 v18, v12, v18 +; GCN-NEXT: v_mov_b32_e32 v12, 15 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 12, v17 +; GCN-NEXT: v_and_b32_sdwa v18, v18, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6f -; GCN-NEXT: v_or_b32_sdwa v14, v16, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GCN-NEXT: v_lshrrev_b16_e64 v16, 15, s7 +; GCN-NEXT: v_or_b32_sdwa v14, v14, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GCN-NEXT: v_lshrrev_b16_e64 v17, 15, s7 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6e -; GCN-NEXT: v_lshrrev_b16_e64 v17, 14, s7 -; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 -; GCN-NEXT: v_and_b32_e32 v17, 1, v17 -; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6d -; GCN-NEXT: v_or_b32_e32 v16, v17, v16 -; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s7 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6c -; GCN-NEXT: v_lshrrev_b16_e64 v18, 12, s7 -; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 -; GCN-NEXT: v_and_b32_e32 v18, 1, v18 -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 -; GCN-NEXT: v_and_b32_e32 v17, 3, v17 -; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6b -; GCN-NEXT: v_or_b32_e32 v16, v17, v16 -; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s7 -; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6a -; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s7 +; GCN-NEXT: v_lshrrev_b16_e64 v18, 14, s7 ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 -; GCN-NEXT: s_cmpk_lg_i32 s0, 0x69 -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s7 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 3, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 +; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6d +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 +; GCN-NEXT: v_lshrrev_b16_e64 v18, 13, s7 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 -; GCN-NEXT: s_cmpk_lg_i32 s0, 0x68 -; GCN-NEXT: v_lshrrev_b16_e64 v19, 8, s7 +; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6c +; GCN-NEXT: v_lshrrev_b16_e64 v19, 12, s7 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 +; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6b ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 -; GCN-NEXT: v_and_b32_e32 v17, 15, v17 +; GCN-NEXT: v_lshrrev_b16_e64 v18, 11, s7 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: s_cmpk_lg_i32 s0, 0x6a +; GCN-NEXT: v_lshrrev_b16_e64 v19, 10, s7 +; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc +; GCN-NEXT: v_and_b32_e32 v19, 1, v19 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 3, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v19, 2, v19 +; GCN-NEXT: s_cmpk_lg_i32 s0, 0x69 +; GCN-NEXT: v_or_b32_e32 v18, v18, v19 +; GCN-NEXT: v_lshrrev_b16_e64 v19, 9, s7 +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: s_cmpk_lg_i32 s0, 0x68 +; GCN-NEXT: v_lshrrev_b16_e64 v16, 8, s7 +; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc +; GCN-NEXT: s_cselect_b64 vcc, -1, 0 +; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc +; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 +; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_or_b32_e32 v16, v16, v19 +; GCN-NEXT: v_and_b32_e32 v16, 3, v16 +; GCN-NEXT: v_or_b32_e32 v16, v16, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 12, v17 +; GCN-NEXT: v_and_b32_sdwa v16, v16, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x67 -; GCN-NEXT: v_or_b32_sdwa v16, v17, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v16, v17, v16 ; GCN-NEXT: v_lshrrev_b16_e64 v17, 7, s7 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x66 @@ -1227,10 +1228,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 3, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x65 -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 ; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s7 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x64 @@ -1241,7 +1243,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x63 ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 @@ -1252,10 +1253,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 3, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v19, 2, v19 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x61 -; GCN-NEXT: v_or_b32_e32 v18, v19, v18 +; GCN-NEXT: v_or_b32_e32 v18, v18, v19 ; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s7 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x60 @@ -1266,7 +1268,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 ; GCN-NEXT: v_and_b32_e32 v15, 1, v15 ; GCN-NEXT: v_or_b32_e32 v15, v15, v19 -; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: v_and_b32_e32 v15, 3, v15 ; GCN-NEXT: v_or_b32_e32 v15, v15, v18 ; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 @@ -1281,10 +1282,11 @@ ; GCN-NEXT: v_mov_b32_e32 v17, s33 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 3, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x55 -; GCN-NEXT: v_or_b32_e32 v16, v17, v16 +; GCN-NEXT: v_or_b32_e32 v16, v16, v17 ; GCN-NEXT: v_mov_b32_e32 v17, s31 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x54 @@ -1295,7 +1297,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: v_and_b32_e32 v17, 3, v17 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x53 ; GCN-NEXT: v_or_b32_e32 v16, v17, v16 @@ -1306,10 +1307,11 @@ ; GCN-NEXT: v_mov_b32_e32 v18, s28 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 3, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x51 -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 ; GCN-NEXT: v_mov_b32_e32 v18, s27 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x50 @@ -1320,7 +1322,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 ; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 @@ -1334,10 +1335,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 3, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5d -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 ; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s25 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5c @@ -1348,7 +1350,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x5b ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 @@ -1359,13 +1360,14 @@ ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc +; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x58 ; GCN-NEXT: v_mov_b32_e32 v3, s25 -; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 -; GCN-NEXT: v_and_b32_e32 v19, 1, v19 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 3, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v19, 2, v19 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x59 -; GCN-NEXT: v_or_b32_e32 v18, v19, v18 +; GCN-NEXT: v_or_b32_e32 v18, v18, v19 ; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s25 ; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 @@ -1373,12 +1375,11 @@ ; GCN-NEXT: v_and_b32_e32 v3, 1, v3 ; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v3, v3, v19 -; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: v_and_b32_e32 v3, 3, v3 ; GCN-NEXT: v_or_b32_e32 v3, v3, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 4, v17 -; GCN-NEXT: v_and_b32_e32 v3, 15, v3 -; GCN-NEXT: v_or_b32_sdwa v3, v3, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_lshlrev_b16_e32 v17, 12, v17 +; GCN-NEXT: v_and_b32_sdwa v3, v3, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v3, v17, v3 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4f ; GCN-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GCN-NEXT: v_lshrrev_b16_e64 v3, 15, s6 @@ -1388,10 +1389,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v3, 3, v3 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4d -; GCN-NEXT: v_or_b32_e32 v3, v17, v3 +; GCN-NEXT: v_or_b32_e32 v3, v3, v17 ; GCN-NEXT: v_lshrrev_b16_e64 v17, 13, s6 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4c @@ -1402,7 +1404,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 ; GCN-NEXT: v_and_b32_e32 v17, 3, v17 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x4b ; GCN-NEXT: v_or_b32_e32 v3, v17, v3 @@ -1413,10 +1414,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 3, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x49 -; GCN-NEXT: v_or_b32_e32 v17, v18, v17 +; GCN-NEXT: v_or_b32_e32 v17, v17, v18 ; GCN-NEXT: v_lshrrev_b16_e64 v18, 9, s6 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x48 @@ -1427,13 +1429,12 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 ; GCN-NEXT: v_or_b32_e32 v17, v18, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v3, 4, v3 -; GCN-NEXT: v_and_b32_e32 v17, 15, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v3, 12, v3 +; GCN-NEXT: v_and_b32_sdwa v17, v17, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x47 -; GCN-NEXT: v_or_b32_sdwa v17, v17, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v17, v3, v17 ; GCN-NEXT: v_lshrrev_b16_e64 v3, 7, s6 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x46 @@ -1441,10 +1442,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v3, 3, v3 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x45 -; GCN-NEXT: v_or_b32_e32 v3, v18, v3 +; GCN-NEXT: v_or_b32_e32 v3, v3, v18 ; GCN-NEXT: v_lshrrev_b16_e64 v18, 5, s6 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x44 @@ -1455,7 +1457,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 ; GCN-NEXT: v_or_b32_e32 v18, v19, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 ; GCN-NEXT: v_and_b32_e32 v18, 3, v18 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x43 ; GCN-NEXT: v_or_b32_e32 v18, v18, v3 @@ -1466,10 +1467,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v3, 1, v3, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v19, 1, v19, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; GCN-NEXT: v_and_b32_e32 v19, 1, v19 +; GCN-NEXT: v_lshlrev_b16_e32 v3, 3, v3 +; GCN-NEXT: v_lshlrev_b16_e32 v19, 2, v19 ; GCN-NEXT: s_cmpk_lg_i32 s0, 0x41 -; GCN-NEXT: v_or_b32_e32 v3, v19, v3 +; GCN-NEXT: v_or_b32_e32 v3, v3, v19 ; GCN-NEXT: v_lshrrev_b16_e64 v19, 1, s6 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 64 @@ -1480,7 +1482,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v19, 1, v19 ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 ; GCN-NEXT: v_or_b32_e32 v2, v2, v19 -; GCN-NEXT: v_lshlrev_b16_e32 v3, 2, v3 ; GCN-NEXT: v_and_b32_e32 v2, 3, v2 ; GCN-NEXT: v_or_b32_e32 v2, v2, v3 ; GCN-NEXT: v_or_b32_sdwa v3, v15, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -1495,11 +1496,12 @@ ; GCN-NEXT: v_mov_b32_e32 v15, s23 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 ; GCN-NEXT: v_and_b32_e32 v15, 1, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v14, 3, v14 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: s_cmp_lg_u32 s0, 53 ; GCN-NEXT: v_or_b32_sdwa v2, v2, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GCN-NEXT: v_or_b32_e32 v14, v15, v14 +; GCN-NEXT: v_or_b32_e32 v14, v14, v15 ; GCN-NEXT: v_mov_b32_e32 v15, s22 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 52 @@ -1511,7 +1513,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 -; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 ; GCN-NEXT: v_and_b32_e32 v15, 3, v15 ; GCN-NEXT: s_cmp_lg_u32 s0, 51 ; GCN-NEXT: v_or_b32_e32 v14, v15, v14 @@ -1522,10 +1523,11 @@ ; GCN-NEXT: v_mov_b32_e32 v16, s19 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 3, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 49 -; GCN-NEXT: v_or_b32_e32 v15, v16, v15 +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 ; GCN-NEXT: v_mov_b32_e32 v16, s18 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 48 @@ -1536,7 +1538,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 ; GCN-NEXT: v_or_b32_e32 v16, v17, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: v_and_b32_e32 v16, 3, v16 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 ; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 @@ -1550,10 +1551,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 3, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 61 -; GCN-NEXT: v_or_b32_e32 v15, v16, v15 +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 ; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s16 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 60 @@ -1564,7 +1566,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 ; GCN-NEXT: v_or_b32_e32 v16, v17, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: v_and_b32_e32 v16, 3, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 59 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 @@ -1575,13 +1576,14 @@ ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc +; GCN-NEXT: v_and_b32_e32 v17, 1, v17 ; GCN-NEXT: s_cmp_lg_u32 s0, 56 ; GCN-NEXT: v_mov_b32_e32 v13, s16 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 -; GCN-NEXT: v_and_b32_e32 v17, 1, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 3, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 57 -; GCN-NEXT: v_or_b32_e32 v16, v17, v16 +; GCN-NEXT: v_or_b32_e32 v16, v16, v17 ; GCN-NEXT: v_lshrrev_b16_e64 v17, 1, s16 ; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 @@ -1589,12 +1591,11 @@ ; GCN-NEXT: v_and_b32_e32 v13, 1, v13 ; GCN-NEXT: v_lshlrev_b16_e32 v17, 1, v17 ; GCN-NEXT: v_or_b32_e32 v13, v13, v17 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: v_and_b32_e32 v13, 3, v13 ; GCN-NEXT: v_or_b32_e32 v13, v13, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 -; GCN-NEXT: v_and_b32_e32 v13, 15, v13 -; GCN-NEXT: v_or_b32_sdwa v13, v13, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_lshlrev_b16_e32 v15, 12, v15 +; GCN-NEXT: v_and_b32_sdwa v13, v13, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v13, v15, v13 ; GCN-NEXT: s_cmp_lg_u32 s0, 47 ; GCN-NEXT: v_or_b32_sdwa v14, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GCN-NEXT: v_lshrrev_b16_e64 v13, 15, s5 @@ -1604,10 +1605,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 ; GCN-NEXT: v_and_b32_e32 v15, 1, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v13, 3, v13 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: s_cmp_lg_u32 s0, 45 -; GCN-NEXT: v_or_b32_e32 v13, v15, v13 +; GCN-NEXT: v_or_b32_e32 v13, v13, v15 ; GCN-NEXT: v_lshrrev_b16_e64 v15, 13, s5 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 44 @@ -1618,7 +1620,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 -; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 ; GCN-NEXT: v_and_b32_e32 v15, 3, v15 ; GCN-NEXT: s_cmp_lg_u32 s0, 43 ; GCN-NEXT: v_or_b32_e32 v13, v15, v13 @@ -1629,10 +1630,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 3, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 41 -; GCN-NEXT: v_or_b32_e32 v15, v16, v15 +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 ; GCN-NEXT: v_lshrrev_b16_e64 v16, 9, s5 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 40 @@ -1643,13 +1645,12 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 ; GCN-NEXT: v_or_b32_e32 v16, v17, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: v_and_b32_e32 v16, 3, v16 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 -; GCN-NEXT: v_lshlrev_b16_e32 v13, 4, v13 -; GCN-NEXT: v_and_b32_e32 v15, 15, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v13, 12, v13 +; GCN-NEXT: v_and_b32_sdwa v15, v15, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GCN-NEXT: s_cmp_lg_u32 s0, 39 -; GCN-NEXT: v_or_b32_sdwa v15, v15, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v15, v13, v15 ; GCN-NEXT: v_lshrrev_b16_e64 v13, 7, s5 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 38 @@ -1657,10 +1658,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v13, 3, v13 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 37 -; GCN-NEXT: v_or_b32_e32 v13, v16, v13 +; GCN-NEXT: v_or_b32_e32 v13, v13, v16 ; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s5 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 36 @@ -1671,7 +1673,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 ; GCN-NEXT: v_or_b32_e32 v16, v17, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 ; GCN-NEXT: v_and_b32_e32 v16, 3, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 35 ; GCN-NEXT: v_or_b32_e32 v16, v16, v13 @@ -1682,10 +1683,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v13, 3, v13 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: s_cmp_lg_u32 s0, 33 -; GCN-NEXT: v_or_b32_e32 v17, v17, v13 +; GCN-NEXT: v_or_b32_e32 v17, v13, v17 ; GCN-NEXT: v_lshrrev_b16_e64 v13, 1, s5 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 32 @@ -1696,7 +1698,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 ; GCN-NEXT: v_and_b32_e32 v1, 1, v1 ; GCN-NEXT: v_or_b32_e32 v1, v1, v13 -; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 ; GCN-NEXT: v_and_b32_e32 v1, 3, v1 ; GCN-NEXT: v_or_b32_e32 v1, v1, v17 ; GCN-NEXT: v_lshlrev_b16_e32 v16, 4, v16 @@ -1712,10 +1713,11 @@ ; GCN-NEXT: v_mov_b32_e32 v15, s14 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v14, 1, v14 ; GCN-NEXT: v_and_b32_e32 v15, 1, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v14, 3, v14 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: s_cmp_lg_u32 s0, 21 -; GCN-NEXT: v_or_b32_e32 v14, v15, v14 +; GCN-NEXT: v_or_b32_e32 v14, v14, v15 ; GCN-NEXT: v_mov_b32_e32 v15, s13 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 20 @@ -1726,7 +1728,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 -; GCN-NEXT: v_lshlrev_b16_e32 v14, 2, v14 ; GCN-NEXT: v_and_b32_e32 v15, 3, v15 ; GCN-NEXT: s_cmp_lg_u32 s0, 19 ; GCN-NEXT: v_or_b32_e32 v14, v15, v14 @@ -1737,10 +1738,11 @@ ; GCN-NEXT: v_mov_b32_e32 v16, s10 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 3, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 17 -; GCN-NEXT: v_or_b32_e32 v15, v16, v15 +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 ; GCN-NEXT: v_mov_b32_e32 v16, s9 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 16 @@ -1751,7 +1753,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 ; GCN-NEXT: v_or_b32_e32 v16, v18, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: v_and_b32_e32 v16, 3, v16 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 ; GCN-NEXT: v_lshlrev_b16_e32 v14, 4, v14 @@ -1765,10 +1766,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 3, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 29 -; GCN-NEXT: v_or_b32_e32 v15, v16, v15 +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 ; GCN-NEXT: v_lshrrev_b16_e64 v16, 5, s1 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 28 @@ -1779,7 +1781,6 @@ ; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 ; GCN-NEXT: v_and_b32_e32 v18, 1, v18 ; GCN-NEXT: v_or_b32_e32 v16, v18, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 ; GCN-NEXT: v_and_b32_e32 v16, 3, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 27 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 @@ -1790,13 +1791,14 @@ ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v18, 1, v18, vcc +; GCN-NEXT: v_and_b32_e32 v18, 1, v18 ; GCN-NEXT: s_cmp_lg_u32 s0, 24 ; GCN-NEXT: v_mov_b32_e32 v17, s1 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 -; GCN-NEXT: v_and_b32_e32 v18, 1, v18 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 3, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v18, 2, v18 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 25 -; GCN-NEXT: v_or_b32_e32 v16, v18, v16 +; GCN-NEXT: v_or_b32_e32 v16, v16, v18 ; GCN-NEXT: v_lshrrev_b16_e64 v18, 1, s1 ; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v17, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 @@ -1804,12 +1806,11 @@ ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 ; GCN-NEXT: v_lshlrev_b16_e32 v18, 1, v18 ; GCN-NEXT: v_or_b32_e32 v17, v17, v18 -; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: v_and_b32_e32 v17, 3, v17 ; GCN-NEXT: v_or_b32_e32 v16, v17, v16 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 -; GCN-NEXT: v_and_b32_e32 v16, 15, v16 -; GCN-NEXT: v_or_b32_sdwa v15, v16, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_lshlrev_b16_e32 v15, 12, v15 +; GCN-NEXT: v_and_b32_sdwa v16, v16, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 15 ; GCN-NEXT: v_or_b32_sdwa v14, v14, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GCN-NEXT: v_lshrrev_b16_e64 v15, 15, s4 @@ -1819,10 +1820,11 @@ ; GCN-NEXT: v_cndmask_b32_e32 v15, 1, v15, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v16, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v15, 1, v15 ; GCN-NEXT: v_and_b32_e32 v16, 1, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 3, v15 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 2, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 13 -; GCN-NEXT: v_or_b32_e32 v15, v16, v15 +; GCN-NEXT: v_or_b32_e32 v15, v15, v16 ; GCN-NEXT: v_lshrrev_b16_e64 v16, 13, s4 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 12 @@ -1834,22 +1836,21 @@ ; GCN-NEXT: v_and_b32_e32 v17, 1, v17 ; GCN-NEXT: v_or_b32_e32 v16, v17, v16 ; GCN-NEXT: s_cmp_lg_u32 s0, 11 -; GCN-NEXT: v_lshrrev_b16_e64 v18, 11, s4 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 2, v15 +; GCN-NEXT: v_lshrrev_b16_e64 v17, 11, s4 ; GCN-NEXT: v_and_b32_e32 v16, 3, v16 ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 10 -; GCN-NEXT: v_lshrrev_b16_e64 v13, 10, s4 +; GCN-NEXT: v_lshrrev_b16_e64 v18, 10, s4 ; GCN-NEXT: v_or_b32_e32 v15, v16, v15 -; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v18, vcc +; GCN-NEXT: v_cndmask_b32_e32 v16, 1, v17, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 9 -; GCN-NEXT: v_lshrrev_b16_e64 v12, 9, s4 -; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc +; GCN-NEXT: v_lshrrev_b16_e64 v13, 9, s4 +; GCN-NEXT: v_cndmask_b32_e32 v17, 1, v18, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 8 ; GCN-NEXT: v_lshrrev_b16_e64 v11, 8, s4 -; GCN-NEXT: v_cndmask_b32_e32 v12, 1, v12, vcc +; GCN-NEXT: v_cndmask_b32_e32 v13, 1, v13, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: s_cmp_lg_u32 s0, 7 ; GCN-NEXT: v_lshrrev_b16_e64 v10, 7, s4 @@ -1884,38 +1885,38 @@ ; GCN-NEXT: v_cndmask_b32_e32 v4, 1, v4, vcc ; GCN-NEXT: s_cselect_b64 vcc, -1, 0 ; GCN-NEXT: v_cndmask_b32_e32 v0, 1, v0, vcc -; GCN-NEXT: v_lshlrev_b16_e32 v16, 1, v16 -; GCN-NEXT: v_and_b32_e32 v13, 1, v13 -; GCN-NEXT: v_lshlrev_b16_e32 v12, 1, v12 +; GCN-NEXT: v_and_b32_e32 v17, 1, v17 +; GCN-NEXT: v_lshlrev_b16_e32 v13, 1, v13 ; GCN-NEXT: v_and_b32_e32 v11, 1, v11 -; GCN-NEXT: v_lshlrev_b16_e32 v10, 1, v10 ; GCN-NEXT: v_and_b32_e32 v9, 1, v9 ; GCN-NEXT: v_lshlrev_b16_e32 v8, 1, v8 ; GCN-NEXT: v_and_b32_e32 v7, 1, v7 -; GCN-NEXT: v_lshlrev_b16_e32 v6, 1, v6 ; GCN-NEXT: v_and_b32_e32 v5, 1, v5 ; GCN-NEXT: v_lshlrev_b16_e32 v4, 1, v4 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0 -; GCN-NEXT: v_or_b32_e32 v13, v13, v16 -; GCN-NEXT: v_or_b32_e32 v11, v11, v12 -; GCN-NEXT: v_or_b32_e32 v9, v9, v10 +; GCN-NEXT: v_lshlrev_b16_e32 v16, 3, v16 +; GCN-NEXT: v_lshlrev_b16_e32 v17, 2, v17 +; GCN-NEXT: v_or_b32_e32 v11, v11, v13 +; GCN-NEXT: v_lshlrev_b16_e32 v10, 3, v10 +; GCN-NEXT: v_lshlrev_b16_e32 v9, 2, v9 ; GCN-NEXT: v_or_b32_e32 v7, v7, v8 -; GCN-NEXT: v_or_b32_e32 v5, v5, v6 +; GCN-NEXT: v_lshlrev_b16_e32 v6, 3, v6 +; GCN-NEXT: v_lshlrev_b16_e32 v5, 2, v5 ; GCN-NEXT: v_or_b32_e32 v0, v0, v4 -; GCN-NEXT: v_lshlrev_b16_e32 v13, 2, v13 +; GCN-NEXT: v_or_b32_e32 v16, v16, v17 ; GCN-NEXT: v_and_b32_e32 v11, 3, v11 -; GCN-NEXT: v_lshlrev_b16_e32 v9, 2, v9 +; GCN-NEXT: v_or_b32_e32 v9, v10, v9 ; GCN-NEXT: v_and_b32_e32 v7, 3, v7 -; GCN-NEXT: v_lshlrev_b16_e32 v5, 2, v5 +; GCN-NEXT: v_or_b32_e32 v5, v6, v5 ; GCN-NEXT: v_and_b32_e32 v0, 3, v0 -; GCN-NEXT: v_or_b32_e32 v11, v11, v13 +; GCN-NEXT: v_or_b32_e32 v11, v11, v16 ; GCN-NEXT: v_or_b32_e32 v7, v7, v9 ; GCN-NEXT: v_or_b32_e32 v0, v0, v5 -; GCN-NEXT: v_lshlrev_b16_e32 v15, 4, v15 -; GCN-NEXT: v_and_b32_e32 v11, 15, v11 +; GCN-NEXT: v_lshlrev_b16_e32 v15, 12, v15 +; GCN-NEXT: v_and_b32_sdwa v11, v11, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GCN-NEXT: v_lshlrev_b16_e32 v7, 4, v7 ; GCN-NEXT: v_and_b32_e32 v0, 15, v0 -; GCN-NEXT: v_or_b32_sdwa v11, v11, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GCN-NEXT: v_or_b32_e32 v11, v15, v11 ; GCN-NEXT: v_or_b32_e32 v0, v0, v7 ; GCN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GCN-NEXT: v_mov_b32_e32 v5, s3 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1213,13 +1213,13 @@ ; SI-NEXT: s_lshr_b32 s4, s11, 24 ; SI-NEXT: s_cmp_lg_u32 s6, 15 ; SI-NEXT: s_cselect_b32 s4, s4, 5 -; SI-NEXT: s_lshl_b32 s4, s4, 8 +; SI-NEXT: s_lshl_b32 s4, s4, 24 ; SI-NEXT: s_lshr_b32 s5, s11, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 14 ; SI-NEXT: s_cselect_b32 s5, s5, 5 ; SI-NEXT: s_and_b32 s5, s5, 0xff -; SI-NEXT: s_or_b32 s4, s5, s4 -; SI-NEXT: s_lshl_b32 s4, s4, 16 +; SI-NEXT: s_lshl_b32 s5, s5, 16 +; SI-NEXT: s_or_b32 s4, s4, s5 ; SI-NEXT: s_lshr_b32 s5, s11, 8 ; SI-NEXT: s_cmp_lg_u32 s6, 13 ; SI-NEXT: s_cselect_b32 s5, s5, 5 @@ -1233,13 +1233,13 @@ ; SI-NEXT: s_lshr_b32 s5, s10, 24 ; SI-NEXT: s_cmp_lg_u32 s6, 11 ; SI-NEXT: s_cselect_b32 s5, s5, 5 -; SI-NEXT: s_lshl_b32 s5, s5, 8 +; SI-NEXT: s_lshl_b32 s5, s5, 24 ; SI-NEXT: s_lshr_b32 s7, s10, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 10 ; SI-NEXT: s_cselect_b32 s7, s7, 5 ; SI-NEXT: s_and_b32 s7, s7, 0xff -; SI-NEXT: s_or_b32 s5, s7, s5 -; SI-NEXT: s_lshl_b32 s5, s5, 16 +; SI-NEXT: s_lshl_b32 s7, s7, 16 +; SI-NEXT: s_or_b32 s5, s5, s7 ; SI-NEXT: s_lshr_b32 s7, s10, 8 ; SI-NEXT: s_cmp_lg_u32 s6, 9 ; SI-NEXT: s_cselect_b32 s7, s7, 5 @@ -1253,13 +1253,13 @@ ; SI-NEXT: s_lshr_b32 s7, s9, 24 ; SI-NEXT: s_cmp_lg_u32 s6, 7 ; SI-NEXT: s_cselect_b32 s7, s7, 5 -; SI-NEXT: s_lshl_b32 s7, s7, 8 +; SI-NEXT: s_lshl_b32 s7, s7, 24 ; SI-NEXT: s_lshr_b32 s10, s9, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 6 ; SI-NEXT: s_cselect_b32 s10, s10, 5 ; SI-NEXT: s_and_b32 s10, s10, 0xff -; SI-NEXT: s_or_b32 s7, s10, s7 -; SI-NEXT: s_lshl_b32 s7, s7, 16 +; SI-NEXT: s_lshl_b32 s10, s10, 16 +; SI-NEXT: s_or_b32 s7, s7, s10 ; SI-NEXT: s_lshr_b32 s10, s9, 8 ; SI-NEXT: s_cmp_lg_u32 s6, 5 ; SI-NEXT: s_cselect_b32 s10, s10, 5 @@ -1273,13 +1273,13 @@ ; SI-NEXT: s_lshr_b32 s9, s8, 24 ; SI-NEXT: s_cmp_lg_u32 s6, 3 ; SI-NEXT: s_cselect_b32 s9, s9, 5 -; SI-NEXT: s_lshl_b32 s9, s9, 8 +; SI-NEXT: s_lshl_b32 s9, s9, 24 ; SI-NEXT: s_lshr_b32 s10, s8, 16 ; SI-NEXT: s_cmp_lg_u32 s6, 2 ; SI-NEXT: s_cselect_b32 s10, s10, 5 ; SI-NEXT: s_and_b32 s10, s10, 0xff -; SI-NEXT: s_or_b32 s9, s10, s9 -; SI-NEXT: s_lshl_b32 s9, s9, 16 +; SI-NEXT: s_lshl_b32 s10, s10, 16 +; SI-NEXT: s_or_b32 s9, s9, s10 ; SI-NEXT: s_lshr_b32 s10, s8, 8 ; SI-NEXT: s_cmp_lg_u32 s6, 1 ; SI-NEXT: s_cselect_b32 s10, s10, 5 diff --git a/llvm/test/CodeGen/BPF/pr57872.ll b/llvm/test/CodeGen/BPF/pr57872.ll --- a/llvm/test/CodeGen/BPF/pr57872.ll +++ b/llvm/test/CodeGen/BPF/pr57872.ll @@ -1,9 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=bpf-- | FileCheck %s -; XFAIL: * %struct.event = type { i8, [84 x i8] } define void @foo(ptr %g) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: r1 = *(u64 *)(r1 + 0) +; CHECK-NEXT: r2 = *(u8 *)(r1 + 83) +; CHECK-NEXT: *(u8 *)(r10 - 4) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 82) +; CHECK-NEXT: *(u8 *)(r10 - 5) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 81) +; CHECK-NEXT: *(u8 *)(r10 - 6) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 80) +; CHECK-NEXT: *(u8 *)(r10 - 7) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 79) +; CHECK-NEXT: *(u8 *)(r10 - 8) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 78) +; CHECK-NEXT: *(u8 *)(r10 - 9) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 77) +; CHECK-NEXT: *(u8 *)(r10 - 10) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 76) +; CHECK-NEXT: *(u8 *)(r10 - 11) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 75) +; CHECK-NEXT: *(u8 *)(r10 - 12) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 74) +; CHECK-NEXT: *(u8 *)(r10 - 13) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 73) +; CHECK-NEXT: *(u8 *)(r10 - 14) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 72) +; CHECK-NEXT: *(u8 *)(r10 - 15) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 71) +; CHECK-NEXT: *(u8 *)(r10 - 16) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 70) +; CHECK-NEXT: *(u8 *)(r10 - 17) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 69) +; CHECK-NEXT: *(u8 *)(r10 - 18) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 68) +; CHECK-NEXT: *(u8 *)(r10 - 19) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 67) +; CHECK-NEXT: *(u8 *)(r10 - 20) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 66) +; CHECK-NEXT: *(u8 *)(r10 - 21) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 65) +; CHECK-NEXT: *(u8 *)(r10 - 22) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 64) +; CHECK-NEXT: *(u8 *)(r10 - 23) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 63) +; CHECK-NEXT: *(u8 *)(r10 - 24) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 62) +; CHECK-NEXT: *(u8 *)(r10 - 25) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 61) +; CHECK-NEXT: *(u8 *)(r10 - 26) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 60) +; CHECK-NEXT: *(u8 *)(r10 - 27) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 59) +; CHECK-NEXT: *(u8 *)(r10 - 28) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 58) +; CHECK-NEXT: *(u8 *)(r10 - 29) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 57) +; CHECK-NEXT: *(u8 *)(r10 - 30) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 56) +; CHECK-NEXT: *(u8 *)(r10 - 31) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 55) +; CHECK-NEXT: *(u8 *)(r10 - 32) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 54) +; CHECK-NEXT: *(u8 *)(r10 - 33) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 53) +; CHECK-NEXT: *(u8 *)(r10 - 34) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 52) +; CHECK-NEXT: *(u8 *)(r10 - 35) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 51) +; CHECK-NEXT: *(u8 *)(r10 - 36) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 50) +; CHECK-NEXT: *(u8 *)(r10 - 37) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 49) +; CHECK-NEXT: *(u8 *)(r10 - 38) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 48) +; CHECK-NEXT: *(u8 *)(r10 - 39) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 47) +; CHECK-NEXT: *(u8 *)(r10 - 40) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 46) +; CHECK-NEXT: *(u8 *)(r10 - 41) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 45) +; CHECK-NEXT: *(u8 *)(r10 - 42) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 44) +; CHECK-NEXT: *(u8 *)(r10 - 43) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 43) +; CHECK-NEXT: *(u8 *)(r10 - 44) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 42) +; CHECK-NEXT: *(u8 *)(r10 - 45) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 41) +; CHECK-NEXT: *(u8 *)(r10 - 46) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 40) +; CHECK-NEXT: *(u8 *)(r10 - 47) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 39) +; CHECK-NEXT: *(u8 *)(r10 - 48) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 38) +; CHECK-NEXT: *(u8 *)(r10 - 49) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 37) +; CHECK-NEXT: *(u8 *)(r10 - 50) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 36) +; CHECK-NEXT: *(u8 *)(r10 - 51) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 35) +; CHECK-NEXT: *(u8 *)(r10 - 52) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 34) +; CHECK-NEXT: *(u8 *)(r10 - 53) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 33) +; CHECK-NEXT: *(u8 *)(r10 - 54) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 32) +; CHECK-NEXT: *(u8 *)(r10 - 55) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 31) +; CHECK-NEXT: *(u8 *)(r10 - 56) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 30) +; CHECK-NEXT: *(u8 *)(r10 - 57) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 29) +; CHECK-NEXT: *(u8 *)(r10 - 58) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 28) +; CHECK-NEXT: *(u8 *)(r10 - 59) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 27) +; CHECK-NEXT: *(u8 *)(r10 - 60) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 26) +; CHECK-NEXT: *(u8 *)(r10 - 61) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 25) +; CHECK-NEXT: *(u8 *)(r10 - 62) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 24) +; CHECK-NEXT: *(u8 *)(r10 - 63) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 23) +; CHECK-NEXT: *(u8 *)(r10 - 64) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 22) +; CHECK-NEXT: *(u8 *)(r10 - 65) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 21) +; CHECK-NEXT: *(u8 *)(r10 - 66) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 20) +; CHECK-NEXT: *(u8 *)(r10 - 67) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 19) +; CHECK-NEXT: *(u8 *)(r10 - 68) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 18) +; CHECK-NEXT: *(u8 *)(r10 - 69) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 17) +; CHECK-NEXT: *(u8 *)(r10 - 70) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 16) +; CHECK-NEXT: *(u8 *)(r10 - 71) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 15) +; CHECK-NEXT: *(u8 *)(r10 - 72) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 14) +; CHECK-NEXT: *(u8 *)(r10 - 73) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 13) +; CHECK-NEXT: *(u8 *)(r10 - 74) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 12) +; CHECK-NEXT: *(u8 *)(r10 - 75) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 11) +; CHECK-NEXT: *(u8 *)(r10 - 76) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 10) +; CHECK-NEXT: *(u8 *)(r10 - 77) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 9) +; CHECK-NEXT: *(u8 *)(r10 - 78) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 8) +; CHECK-NEXT: *(u8 *)(r10 - 79) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 7) +; CHECK-NEXT: *(u8 *)(r10 - 80) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 6) +; CHECK-NEXT: *(u8 *)(r10 - 81) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 5) +; CHECK-NEXT: *(u8 *)(r10 - 82) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 4) +; CHECK-NEXT: *(u8 *)(r10 - 83) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 3) +; CHECK-NEXT: *(u8 *)(r10 - 84) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 2) +; CHECK-NEXT: *(u8 *)(r10 - 85) = r2 +; CHECK-NEXT: r2 = *(u8 *)(r1 + 1) +; CHECK-NEXT: *(u8 *)(r10 - 86) = r2 +; CHECK-NEXT: r1 = *(u8 *)(r1 + 0) +; CHECK-NEXT: *(u8 *)(r10 - 87) = r1 +; CHECK-NEXT: r1 = r10 +; CHECK-NEXT: r1 += -88 +; CHECK-NEXT: call bar +; CHECK-NEXT: exit entry: %event = alloca %struct.event, align 1 %hostname = getelementptr inbounds %struct.event, ptr %event, i64 0, i32 1 diff --git a/llvm/test/CodeGen/Mips/cconv/return-struct.ll b/llvm/test/CodeGen/Mips/cconv/return-struct.ll --- a/llvm/test/CodeGen/Mips/cconv/return-struct.ll +++ b/llvm/test/CodeGen/Mips/cconv/return-struct.ll @@ -175,12 +175,12 @@ ; N32-BE: # %bb.0: # %entry ; N32-BE-NEXT: lui $1, %hi(struct_3xi16) ; N32-BE-NEXT: lw $2, %lo(struct_3xi16)($1) -; N32-BE-NEXT: dsll $2, $2, 16 +; N32-BE-NEXT: dsll $2, $2, 32 ; N32-BE-NEXT: addiu $1, $1, %lo(struct_3xi16) ; N32-BE-NEXT: lhu $1, 4($1) -; N32-BE-NEXT: or $1, $1, $2 +; N32-BE-NEXT: dsll $1, $1, 16 ; N32-BE-NEXT: jr $ra -; N32-BE-NEXT: dsll $2, $1, 16 +; N32-BE-NEXT: or $2, $2, $1 ; ; N32-LE-LABEL: ret_struct_3xi16: ; N32-LE: # %bb.0: # %entry @@ -200,12 +200,12 @@ ; N64-BE-NEXT: daddiu $1, $1, %hi(struct_3xi16) ; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: lw $2, %lo(struct_3xi16)($1) -; N64-BE-NEXT: dsll $2, $2, 16 +; N64-BE-NEXT: dsll $2, $2, 32 ; N64-BE-NEXT: daddiu $1, $1, %lo(struct_3xi16) ; N64-BE-NEXT: lhu $1, 4($1) -; N64-BE-NEXT: or $1, $1, $2 +; N64-BE-NEXT: dsll $1, $1, 16 ; N64-BE-NEXT: jr $ra -; N64-BE-NEXT: dsll $2, $1, 16 +; N64-BE-NEXT: or $2, $2, $1 ; ; N64-LE-LABEL: ret_struct_3xi16: ; N64-LE: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -469,14 +469,14 @@ ; MIPS32-NEXT: srl $1, $5, 24 ; MIPS32-NEXT: srl $2, $4, 24 ; MIPS32-NEXT: addu $1, $2, $1 -; MIPS32-NEXT: sll $1, $1, 8 -; MIPS32-NEXT: srl $2, $5, 16 -; MIPS32-NEXT: srl $3, $4, 16 -; MIPS32-NEXT: addu $2, $3, $2 -; MIPS32-NEXT: andi $2, $2, 255 -; MIPS32-NEXT: or $1, $2, $1 ; MIPS32-NEXT: addu $2, $4, $5 -; MIPS32-NEXT: sll $1, $1, 16 +; MIPS32-NEXT: sll $1, $1, 24 +; MIPS32-NEXT: srl $3, $5, 16 +; MIPS32-NEXT: srl $6, $4, 16 +; MIPS32-NEXT: addu $3, $6, $3 +; MIPS32-NEXT: andi $3, $3, 255 +; MIPS32-NEXT: sll $3, $3, 16 +; MIPS32-NEXT: or $1, $1, $3 ; MIPS32-NEXT: andi $2, $2, 255 ; MIPS32-NEXT: srl $3, $5, 8 ; MIPS32-NEXT: srl $4, $4, 8 @@ -495,14 +495,14 @@ ; MIPS64-NEXT: sll $3, $4, 0 ; MIPS64-NEXT: srl $4, $3, 24 ; MIPS64-NEXT: addu $2, $4, $2 -; MIPS64-NEXT: sll $2, $2, 8 -; MIPS64-NEXT: srl $4, $1, 16 -; MIPS64-NEXT: srl $5, $3, 16 -; MIPS64-NEXT: addu $4, $5, $4 -; MIPS64-NEXT: andi $4, $4, 255 -; MIPS64-NEXT: or $2, $4, $2 ; MIPS64-NEXT: addu $4, $3, $1 -; MIPS64-NEXT: sll $2, $2, 16 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: srl $5, $1, 16 +; MIPS64-NEXT: srl $6, $3, 16 +; MIPS64-NEXT: addu $5, $6, $5 +; MIPS64-NEXT: andi $5, $5, 255 +; MIPS64-NEXT: sll $5, $5, 16 +; MIPS64-NEXT: or $2, $2, $5 ; MIPS64-NEXT: andi $4, $4, 255 ; MIPS64-NEXT: srl $1, $1, 8 ; MIPS64-NEXT: srl $3, $3, 8 @@ -592,37 +592,37 @@ define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) { ; MIPS32-LABEL: i8_8: ; MIPS32: # %bb.0: -; MIPS32-NEXT: srl $1, $6, 24 -; MIPS32-NEXT: srl $2, $4, 24 -; MIPS32-NEXT: addu $1, $2, $1 -; MIPS32-NEXT: sll $1, $1, 8 -; MIPS32-NEXT: srl $2, $6, 16 -; MIPS32-NEXT: srl $3, $4, 16 +; MIPS32-NEXT: addu $1, $4, $6 +; MIPS32-NEXT: srl $2, $6, 24 +; MIPS32-NEXT: srl $3, $4, 24 ; MIPS32-NEXT: addu $2, $3, $2 -; MIPS32-NEXT: andi $2, $2, 255 -; MIPS32-NEXT: srl $3, $7, 24 -; MIPS32-NEXT: srl $8, $5, 24 -; MIPS32-NEXT: or $1, $2, $1 -; MIPS32-NEXT: addu $2, $8, $3 -; MIPS32-NEXT: addu $3, $4, $6 -; MIPS32-NEXT: sll $2, $2, 8 -; MIPS32-NEXT: srl $8, $7, 16 -; MIPS32-NEXT: srl $9, $5, 16 -; MIPS32-NEXT: addu $8, $9, $8 -; MIPS32-NEXT: andi $8, $8, 255 -; MIPS32-NEXT: or $8, $8, $2 -; MIPS32-NEXT: sll $1, $1, 16 -; MIPS32-NEXT: andi $2, $3, 255 +; MIPS32-NEXT: andi $1, $1, 255 ; MIPS32-NEXT: srl $3, $6, 8 -; MIPS32-NEXT: srl $4, $4, 8 -; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: srl $8, $4, 8 +; MIPS32-NEXT: addu $3, $8, $3 ; MIPS32-NEXT: sll $3, $3, 8 -; MIPS32-NEXT: or $2, $2, $3 -; MIPS32-NEXT: andi $2, $2, 65535 -; MIPS32-NEXT: addu $3, $5, $7 -; MIPS32-NEXT: or $2, $2, $1 -; MIPS32-NEXT: sll $1, $8, 16 +; MIPS32-NEXT: srl $6, $6, 16 +; MIPS32-NEXT: srl $4, $4, 16 +; MIPS32-NEXT: or $1, $1, $3 +; MIPS32-NEXT: sll $2, $2, 24 +; MIPS32-NEXT: addu $3, $4, $6 ; MIPS32-NEXT: andi $3, $3, 255 +; MIPS32-NEXT: sll $3, $3, 16 +; MIPS32-NEXT: srl $4, $7, 24 +; MIPS32-NEXT: srl $6, $5, 24 +; MIPS32-NEXT: or $2, $2, $3 +; MIPS32-NEXT: andi $1, $1, 65535 +; MIPS32-NEXT: addu $3, $6, $4 +; MIPS32-NEXT: addu $4, $5, $7 +; MIPS32-NEXT: sll $3, $3, 24 +; MIPS32-NEXT: srl $6, $7, 16 +; MIPS32-NEXT: srl $8, $5, 16 +; MIPS32-NEXT: addu $6, $8, $6 +; MIPS32-NEXT: andi $6, $6, 255 +; MIPS32-NEXT: sll $6, $6, 16 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: or $1, $3, $6 +; MIPS32-NEXT: andi $3, $4, 255 ; MIPS32-NEXT: srl $4, $7, 8 ; MIPS32-NEXT: srl $5, $5, 8 ; MIPS32-NEXT: addu $4, $5, $4 @@ -635,57 +635,57 @@ ; ; MIPS64-LABEL: i8_8: ; MIPS64: # %bb.0: -; MIPS64-NEXT: dsrl $1, $5, 56 +; MIPS64-NEXT: dsrl $1, $5, 48 ; MIPS64-NEXT: sll $1, $1, 0 -; MIPS64-NEXT: dsrl $2, $4, 56 +; MIPS64-NEXT: dsrl $2, $4, 48 ; MIPS64-NEXT: sll $2, $2, 0 ; MIPS64-NEXT: addu $1, $2, $1 -; MIPS64-NEXT: dsrl $2, $5, 48 -; MIPS64-NEXT: sll $1, $1, 8 +; MIPS64-NEXT: dsrl $2, $5, 56 +; MIPS64-NEXT: andi $1, $1, 255 ; MIPS64-NEXT: sll $2, $2, 0 -; MIPS64-NEXT: dsrl $3, $4, 48 +; MIPS64-NEXT: dsrl $3, $4, 56 ; MIPS64-NEXT: sll $3, $3, 0 ; MIPS64-NEXT: addu $2, $3, $2 -; MIPS64-NEXT: andi $2, $2, 255 ; MIPS64-NEXT: dsrl $3, $5, 40 -; MIPS64-NEXT: or $1, $2, $1 -; MIPS64-NEXT: sll $2, $5, 0 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: sll $1, $1, 16 ; MIPS64-NEXT: sll $3, $3, 0 ; MIPS64-NEXT: dsrl $6, $4, 40 ; MIPS64-NEXT: sll $6, $6, 0 ; MIPS64-NEXT: addu $3, $6, $3 -; MIPS64-NEXT: dsrl $5, $5, 32 -; MIPS64-NEXT: srl $6, $2, 24 +; MIPS64-NEXT: dsrl $6, $5, 32 ; MIPS64-NEXT: sll $7, $4, 0 -; MIPS64-NEXT: srl $8, $7, 24 -; MIPS64-NEXT: addu $6, $8, $6 -; MIPS64-NEXT: sll $1, $1, 16 -; MIPS64-NEXT: sll $3, $3, 8 ; MIPS64-NEXT: sll $5, $5, 0 +; MIPS64-NEXT: srl $8, $5, 24 +; MIPS64-NEXT: srl $9, $7, 24 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: sll $2, $3, 8 +; MIPS64-NEXT: sll $3, $6, 0 ; MIPS64-NEXT: dsrl $4, $4, 32 ; MIPS64-NEXT: sll $4, $4, 0 -; MIPS64-NEXT: addu $4, $4, $5 -; MIPS64-NEXT: andi $4, $4, 255 -; MIPS64-NEXT: or $3, $4, $3 -; MIPS64-NEXT: andi $3, $3, 65535 -; MIPS64-NEXT: or $1, $3, $1 -; MIPS64-NEXT: sll $3, $6, 8 -; MIPS64-NEXT: srl $4, $2, 16 -; MIPS64-NEXT: srl $5, $7, 16 -; MIPS64-NEXT: addu $4, $5, $4 +; MIPS64-NEXT: addu $3, $4, $3 +; MIPS64-NEXT: andi $3, $3, 255 +; MIPS64-NEXT: or $2, $3, $2 +; MIPS64-NEXT: andi $2, $2, 65535 +; MIPS64-NEXT: or $1, $2, $1 +; MIPS64-NEXT: addu $2, $9, $8 +; MIPS64-NEXT: addu $3, $7, $5 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: srl $4, $5, 16 +; MIPS64-NEXT: srl $6, $7, 16 +; MIPS64-NEXT: addu $4, $6, $4 ; MIPS64-NEXT: andi $4, $4, 255 -; MIPS64-NEXT: or $3, $4, $3 -; MIPS64-NEXT: addu $4, $7, $2 +; MIPS64-NEXT: sll $4, $4, 16 ; MIPS64-NEXT: dsll $1, $1, 32 -; MIPS64-NEXT: sll $3, $3, 16 -; MIPS64-NEXT: andi $4, $4, 255 -; MIPS64-NEXT: srl $2, $2, 8 +; MIPS64-NEXT: or $2, $2, $4 +; MIPS64-NEXT: andi $3, $3, 255 +; MIPS64-NEXT: srl $4, $5, 8 ; MIPS64-NEXT: srl $5, $7, 8 -; MIPS64-NEXT: addu $2, $5, $2 -; MIPS64-NEXT: sll $2, $2, 8 -; MIPS64-NEXT: or $2, $4, $2 -; MIPS64-NEXT: andi $2, $2, 65535 -; MIPS64-NEXT: or $2, $2, $3 +; MIPS64-NEXT: addu $4, $5, $4 +; MIPS64-NEXT: sll $4, $4, 8 +; MIPS64-NEXT: or $3, $3, $4 +; MIPS64-NEXT: andi $3, $3, 65535 +; MIPS64-NEXT: or $2, $3, $2 ; MIPS64-NEXT: dsll $2, $2, 32 ; MIPS64-NEXT: dsrl $2, $2, 32 ; MIPS64-NEXT: or $2, $2, $1 @@ -916,84 +916,84 @@ define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) { ; MIPS32-LABEL: i8_16: ; MIPS32: # %bb.0: -; MIPS32-NEXT: lw $1, 24($sp) -; MIPS32-NEXT: srl $2, $1, 24 -; MIPS32-NEXT: srl $3, $6, 24 -; MIPS32-NEXT: srl $8, $1, 16 -; MIPS32-NEXT: srl $9, $6, 16 -; MIPS32-NEXT: srl $10, $1, 8 -; MIPS32-NEXT: srl $11, $6, 8 -; MIPS32-NEXT: lw $12, 20($sp) -; MIPS32-NEXT: srl $13, $12, 8 -; MIPS32-NEXT: srl $14, $5, 8 -; MIPS32-NEXT: addu $13, $14, $13 -; MIPS32-NEXT: addu $14, $5, $12 -; MIPS32-NEXT: addu $10, $11, $10 -; MIPS32-NEXT: addu $1, $6, $1 -; MIPS32-NEXT: addu $6, $9, $8 -; MIPS32-NEXT: addu $2, $3, $2 -; MIPS32-NEXT: srl $3, $12, 24 -; MIPS32-NEXT: srl $8, $5, 24 -; MIPS32-NEXT: srl $9, $12, 16 -; MIPS32-NEXT: srl $5, $5, 16 -; MIPS32-NEXT: addu $5, $5, $9 -; MIPS32-NEXT: addu $3, $8, $3 -; MIPS32-NEXT: sll $2, $2, 8 -; MIPS32-NEXT: andi $6, $6, 255 -; MIPS32-NEXT: andi $1, $1, 255 -; MIPS32-NEXT: sll $8, $10, 8 -; MIPS32-NEXT: andi $9, $14, 255 -; MIPS32-NEXT: sll $10, $13, 8 -; MIPS32-NEXT: lw $11, 28($sp) -; MIPS32-NEXT: lw $12, 16($sp) -; MIPS32-NEXT: srl $13, $12, 24 -; MIPS32-NEXT: srl $14, $4, 24 -; MIPS32-NEXT: srl $15, $11, 24 -; MIPS32-NEXT: srl $24, $7, 24 -; MIPS32-NEXT: or $9, $9, $10 -; MIPS32-NEXT: or $1, $1, $8 -; MIPS32-NEXT: or $2, $6, $2 -; MIPS32-NEXT: addu $6, $24, $15 -; MIPS32-NEXT: sll $3, $3, 8 -; MIPS32-NEXT: andi $5, $5, 255 -; MIPS32-NEXT: addu $8, $14, $13 -; MIPS32-NEXT: sll $8, $8, 8 -; MIPS32-NEXT: srl $10, $12, 16 +; MIPS32-NEXT: lw $1, 16($sp) +; MIPS32-NEXT: lw $2, 20($sp) +; MIPS32-NEXT: lw $3, 24($sp) +; MIPS32-NEXT: srl $8, $3, 8 +; MIPS32-NEXT: srl $9, $6, 8 +; MIPS32-NEXT: srl $10, $2, 16 +; MIPS32-NEXT: srl $11, $5, 16 +; MIPS32-NEXT: srl $12, $1, 16 ; MIPS32-NEXT: srl $13, $4, 16 -; MIPS32-NEXT: addu $10, $13, $10 -; MIPS32-NEXT: andi $10, $10, 255 -; MIPS32-NEXT: or $8, $10, $8 -; MIPS32-NEXT: or $3, $5, $3 -; MIPS32-NEXT: addu $5, $4, $12 -; MIPS32-NEXT: sll $6, $6, 8 -; MIPS32-NEXT: srl $10, $11, 16 -; MIPS32-NEXT: srl $13, $7, 16 -; MIPS32-NEXT: addu $10, $13, $10 +; MIPS32-NEXT: srl $14, $1, 8 +; MIPS32-NEXT: srl $15, $4, 8 +; MIPS32-NEXT: addu $24, $6, $3 +; MIPS32-NEXT: addu $14, $15, $14 +; MIPS32-NEXT: addu $15, $4, $1 +; MIPS32-NEXT: addu $12, $13, $12 +; MIPS32-NEXT: addu $10, $11, $10 +; MIPS32-NEXT: srl $11, $2, 24 +; MIPS32-NEXT: addu $13, $5, $2 +; MIPS32-NEXT: addu $8, $9, $8 +; MIPS32-NEXT: srl $1, $1, 24 +; MIPS32-NEXT: srl $4, $4, 24 +; MIPS32-NEXT: srl $9, $5, 24 +; MIPS32-NEXT: srl $25, $3, 24 +; MIPS32-NEXT: srl $gp, $6, 24 +; MIPS32-NEXT: addu $25, $gp, $25 ; MIPS32-NEXT: andi $10, $10, 255 -; MIPS32-NEXT: or $6, $10, $6 -; MIPS32-NEXT: sll $10, $2, 16 -; MIPS32-NEXT: andi $1, $1, 65535 +; MIPS32-NEXT: addu $9, $9, $11 +; MIPS32-NEXT: andi $11, $12, 255 +; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: andi $4, $15, 255 +; MIPS32-NEXT: sll $12, $14, 8 +; MIPS32-NEXT: andi $14, $24, 255 +; MIPS32-NEXT: sll $8, $8, 8 +; MIPS32-NEXT: andi $13, $13, 255 +; MIPS32-NEXT: srl $2, $2, 8 +; MIPS32-NEXT: srl $5, $5, 8 +; MIPS32-NEXT: addu $2, $5, $2 +; MIPS32-NEXT: sll $2, $2, 8 +; MIPS32-NEXT: srl $3, $3, 16 +; MIPS32-NEXT: srl $5, $6, 16 +; MIPS32-NEXT: or $2, $13, $2 +; MIPS32-NEXT: or $6, $14, $8 +; MIPS32-NEXT: or $4, $4, $12 +; MIPS32-NEXT: sll $1, $1, 24 +; MIPS32-NEXT: sll $8, $11, 16 +; MIPS32-NEXT: sll $9, $9, 24 +; MIPS32-NEXT: sll $10, $10, 16 +; MIPS32-NEXT: sll $11, $25, 24 +; MIPS32-NEXT: addu $3, $5, $3 +; MIPS32-NEXT: andi $3, $3, 255 ; MIPS32-NEXT: sll $3, $3, 16 -; MIPS32-NEXT: andi $9, $9, 65535 -; MIPS32-NEXT: sll $2, $8, 16 -; MIPS32-NEXT: andi $5, $5, 255 -; MIPS32-NEXT: srl $8, $12, 8 -; MIPS32-NEXT: srl $4, $4, 8 -; MIPS32-NEXT: addu $4, $4, $8 -; MIPS32-NEXT: sll $4, $4, 8 -; MIPS32-NEXT: or $4, $5, $4 +; MIPS32-NEXT: lw $5, 28($sp) +; MIPS32-NEXT: srl $12, $5, 24 +; MIPS32-NEXT: srl $13, $7, 24 +; MIPS32-NEXT: or $11, $11, $3 +; MIPS32-NEXT: or $3, $9, $10 +; MIPS32-NEXT: or $1, $1, $8 ; MIPS32-NEXT: andi $4, $4, 65535 -; MIPS32-NEXT: addu $5, $7, $11 -; MIPS32-NEXT: or $2, $4, $2 +; MIPS32-NEXT: addu $8, $13, $12 +; MIPS32-NEXT: andi $6, $6, 65535 +; MIPS32-NEXT: andi $9, $2, 65535 +; MIPS32-NEXT: addu $10, $7, $5 +; MIPS32-NEXT: sll $8, $8, 24 +; MIPS32-NEXT: srl $2, $5, 16 +; MIPS32-NEXT: srl $12, $7, 16 +; MIPS32-NEXT: addu $2, $12, $2 +; MIPS32-NEXT: andi $2, $2, 255 +; MIPS32-NEXT: sll $12, $2, 16 +; MIPS32-NEXT: or $2, $4, $1 ; MIPS32-NEXT: or $3, $9, $3 -; MIPS32-NEXT: or $4, $1, $10 -; MIPS32-NEXT: sll $1, $6, 16 -; MIPS32-NEXT: andi $5, $5, 255 -; MIPS32-NEXT: srl $6, $11, 8 +; MIPS32-NEXT: or $4, $6, $11 +; MIPS32-NEXT: or $1, $8, $12 +; MIPS32-NEXT: andi $6, $10, 255 +; MIPS32-NEXT: srl $5, $5, 8 ; MIPS32-NEXT: srl $7, $7, 8 -; MIPS32-NEXT: addu $6, $7, $6 -; MIPS32-NEXT: sll $6, $6, 8 -; MIPS32-NEXT: or $5, $5, $6 +; MIPS32-NEXT: addu $5, $7, $5 +; MIPS32-NEXT: sll $5, $5, 8 +; MIPS32-NEXT: or $5, $6, $5 ; MIPS32-NEXT: andi $5, $5, 65535 ; MIPS32-NEXT: or $5, $5, $1 ; MIPS32-NEXT: jr $ra @@ -1001,111 +1001,111 @@ ; ; MIPS64-LABEL: i8_16: ; MIPS64: # %bb.0: -; MIPS64-NEXT: dsrl $1, $7, 56 -; MIPS64-NEXT: dsrl $2, $5, 56 -; MIPS64-NEXT: dsrl $3, $7, 48 -; MIPS64-NEXT: dsrl $8, $5, 48 -; MIPS64-NEXT: dsrl $9, $6, 56 -; MIPS64-NEXT: dsrl $10, $4, 56 -; MIPS64-NEXT: dsrl $11, $7, 32 -; MIPS64-NEXT: sll $1, $1, 0 +; MIPS64-NEXT: sll $1, $6, 0 +; MIPS64-NEXT: dsrl $2, $6, 56 +; MIPS64-NEXT: dsrl $3, $6, 48 +; MIPS64-NEXT: dsrl $8, $4, 48 +; MIPS64-NEXT: srl $9, $1, 16 +; MIPS64-NEXT: sll $10, $4, 0 +; MIPS64-NEXT: srl $11, $10, 16 +; MIPS64-NEXT: dsrl $12, $7, 56 +; MIPS64-NEXT: addu $13, $10, $1 +; MIPS64-NEXT: addu $9, $11, $9 ; MIPS64-NEXT: sll $2, $2, 0 +; MIPS64-NEXT: dsrl $11, $7, 48 +; MIPS64-NEXT: srl $14, $1, 8 +; MIPS64-NEXT: srl $15, $10, 8 +; MIPS64-NEXT: addu $14, $15, $14 +; MIPS64-NEXT: dsrl $15, $4, 56 +; MIPS64-NEXT: dsrl $24, $7, 40 ; MIPS64-NEXT: sll $3, $3, 0 ; MIPS64-NEXT: sll $8, $8, 0 -; MIPS64-NEXT: dsrl $12, $7, 40 -; MIPS64-NEXT: sll $12, $12, 0 -; MIPS64-NEXT: dsrl $13, $5, 40 -; MIPS64-NEXT: sll $13, $13, 0 -; MIPS64-NEXT: addu $12, $13, $12 -; MIPS64-NEXT: addu $3, $8, $3 -; MIPS64-NEXT: addu $1, $2, $1 -; MIPS64-NEXT: sll $2, $9, 0 -; MIPS64-NEXT: sll $8, $10, 0 -; MIPS64-NEXT: dsrl $9, $6, 48 -; MIPS64-NEXT: sll $9, $9, 0 -; MIPS64-NEXT: dsrl $10, $4, 48 -; MIPS64-NEXT: sll $10, $10, 0 -; MIPS64-NEXT: addu $9, $10, $9 -; MIPS64-NEXT: addu $2, $8, $2 -; MIPS64-NEXT: sll $8, $1, 8 -; MIPS64-NEXT: andi $3, $3, 255 -; MIPS64-NEXT: sll $1, $12, 8 -; MIPS64-NEXT: sll $10, $11, 0 -; MIPS64-NEXT: dsrl $11, $5, 32 -; MIPS64-NEXT: sll $11, $11, 0 -; MIPS64-NEXT: addu $10, $11, $10 -; MIPS64-NEXT: andi $10, $10, 255 -; MIPS64-NEXT: or $10, $10, $1 -; MIPS64-NEXT: sll $1, $6, 0 -; MIPS64-NEXT: or $8, $3, $8 -; MIPS64-NEXT: sll $2, $2, 8 +; MIPS64-NEXT: sll $15, $15, 0 ; MIPS64-NEXT: andi $9, $9, 255 -; MIPS64-NEXT: dsrl $11, $6, 40 -; MIPS64-NEXT: srl $3, $1, 24 -; MIPS64-NEXT: sll $12, $4, 0 -; MIPS64-NEXT: srl $13, $12, 24 -; MIPS64-NEXT: srl $14, $1, 16 -; MIPS64-NEXT: srl $15, $12, 16 -; MIPS64-NEXT: andi $10, $10, 65535 -; MIPS64-NEXT: addu $14, $15, $14 -; MIPS64-NEXT: addu $13, $13, $3 -; MIPS64-NEXT: sll $3, $7, 0 -; MIPS64-NEXT: or $2, $9, $2 -; MIPS64-NEXT: sll $7, $8, 16 +; MIPS64-NEXT: addu $2, $15, $2 +; MIPS64-NEXT: andi $13, $13, 255 +; MIPS64-NEXT: sll $14, $14, 8 +; MIPS64-NEXT: addu $3, $8, $3 ; MIPS64-NEXT: sll $8, $11, 0 -; MIPS64-NEXT: dsrl $9, $4, 40 -; MIPS64-NEXT: sll $9, $9, 0 -; MIPS64-NEXT: addu $8, $9, $8 +; MIPS64-NEXT: srl $1, $1, 24 +; MIPS64-NEXT: sll $11, $12, 0 +; MIPS64-NEXT: dsrl $12, $5, 56 +; MIPS64-NEXT: dsrl $15, $5, 48 +; MIPS64-NEXT: andi $3, $3, 255 +; MIPS64-NEXT: dsrl $25, $6, 40 +; MIPS64-NEXT: sll $15, $15, 0 +; MIPS64-NEXT: srl $10, $10, 24 +; MIPS64-NEXT: sll $12, $12, 0 +; MIPS64-NEXT: or $13, $13, $14 +; MIPS64-NEXT: sll $14, $24, 0 +; MIPS64-NEXT: sll $2, $2, 24 +; MIPS64-NEXT: addu $11, $12, $11 +; MIPS64-NEXT: sll $9, $9, 16 +; MIPS64-NEXT: addu $1, $10, $1 +; MIPS64-NEXT: addu $8, $15, $8 +; MIPS64-NEXT: sll $10, $25, 0 +; MIPS64-NEXT: dsrl $12, $4, 40 +; MIPS64-NEXT: sll $12, $12, 0 +; MIPS64-NEXT: addu $10, $12, $10 +; MIPS64-NEXT: sll $3, $3, 16 +; MIPS64-NEXT: andi $8, $8, 255 +; MIPS64-NEXT: sll $1, $1, 24 +; MIPS64-NEXT: dsrl $12, $5, 40 +; MIPS64-NEXT: sll $12, $12, 0 ; MIPS64-NEXT: dsrl $6, $6, 32 -; MIPS64-NEXT: srl $9, $3, 24 -; MIPS64-NEXT: sll $5, $5, 0 -; MIPS64-NEXT: srl $11, $5, 24 -; MIPS64-NEXT: or $7, $10, $7 -; MIPS64-NEXT: addu $9, $11, $9 -; MIPS64-NEXT: sll $10, $13, 8 -; MIPS64-NEXT: andi $11, $14, 255 -; MIPS64-NEXT: sll $2, $2, 16 -; MIPS64-NEXT: sll $8, $8, 8 +; MIPS64-NEXT: or $1, $1, $9 +; MIPS64-NEXT: addu $9, $12, $14 +; MIPS64-NEXT: sll $11, $11, 24 +; MIPS64-NEXT: sll $8, $8, 16 +; MIPS64-NEXT: dsrl $12, $7, 32 +; MIPS64-NEXT: andi $13, $13, 65535 +; MIPS64-NEXT: or $2, $2, $3 +; MIPS64-NEXT: sll $3, $10, 8 ; MIPS64-NEXT: sll $6, $6, 0 ; MIPS64-NEXT: dsrl $4, $4, 32 ; MIPS64-NEXT: sll $4, $4, 0 ; MIPS64-NEXT: addu $4, $4, $6 ; MIPS64-NEXT: andi $4, $4, 255 -; MIPS64-NEXT: or $4, $4, $8 -; MIPS64-NEXT: andi $4, $4, 65535 -; MIPS64-NEXT: or $2, $4, $2 -; MIPS64-NEXT: or $4, $11, $10 -; MIPS64-NEXT: addu $6, $12, $1 -; MIPS64-NEXT: sll $8, $9, 8 -; MIPS64-NEXT: srl $9, $3, 16 -; MIPS64-NEXT: srl $10, $5, 16 -; MIPS64-NEXT: addu $9, $10, $9 -; MIPS64-NEXT: andi $9, $9, 255 -; MIPS64-NEXT: or $8, $9, $8 -; MIPS64-NEXT: addu $9, $5, $3 -; MIPS64-NEXT: dsll $2, $2, 32 -; MIPS64-NEXT: sll $4, $4, 16 +; MIPS64-NEXT: or $3, $4, $3 +; MIPS64-NEXT: andi $3, $3, 65535 +; MIPS64-NEXT: or $2, $3, $2 +; MIPS64-NEXT: or $1, $13, $1 +; MIPS64-NEXT: or $3, $11, $8 +; MIPS64-NEXT: sll $4, $9, 8 +; MIPS64-NEXT: sll $6, $12, 0 +; MIPS64-NEXT: dsrl $8, $5, 32 +; MIPS64-NEXT: sll $8, $8, 0 +; MIPS64-NEXT: addu $6, $8, $6 ; MIPS64-NEXT: andi $6, $6, 255 -; MIPS64-NEXT: srl $1, $1, 8 -; MIPS64-NEXT: srl $10, $12, 8 -; MIPS64-NEXT: addu $1, $10, $1 -; MIPS64-NEXT: sll $1, $1, 8 -; MIPS64-NEXT: or $1, $6, $1 -; MIPS64-NEXT: andi $1, $1, 65535 -; MIPS64-NEXT: or $1, $1, $4 +; MIPS64-NEXT: or $4, $6, $4 +; MIPS64-NEXT: andi $4, $4, 65535 ; MIPS64-NEXT: dsll $1, $1, 32 +; MIPS64-NEXT: or $3, $4, $3 +; MIPS64-NEXT: sll $4, $7, 0 +; MIPS64-NEXT: srl $6, $4, 24 +; MIPS64-NEXT: sll $5, $5, 0 +; MIPS64-NEXT: srl $7, $5, 24 +; MIPS64-NEXT: addu $8, $5, $4 +; MIPS64-NEXT: dsll $2, $2, 32 ; MIPS64-NEXT: dsrl $1, $1, 32 +; MIPS64-NEXT: addu $6, $7, $6 +; MIPS64-NEXT: sll $6, $6, 24 +; MIPS64-NEXT: srl $7, $4, 16 +; MIPS64-NEXT: srl $9, $5, 16 +; MIPS64-NEXT: addu $7, $9, $7 +; MIPS64-NEXT: andi $7, $7, 255 +; MIPS64-NEXT: sll $7, $7, 16 ; MIPS64-NEXT: or $2, $1, $2 -; MIPS64-NEXT: dsll $1, $7, 32 -; MIPS64-NEXT: sll $4, $8, 16 -; MIPS64-NEXT: andi $6, $9, 255 -; MIPS64-NEXT: srl $3, $3, 8 +; MIPS64-NEXT: dsll $1, $3, 32 +; MIPS64-NEXT: or $3, $6, $7 +; MIPS64-NEXT: andi $6, $8, 255 +; MIPS64-NEXT: srl $4, $4, 8 ; MIPS64-NEXT: srl $5, $5, 8 -; MIPS64-NEXT: addu $3, $5, $3 -; MIPS64-NEXT: sll $3, $3, 8 -; MIPS64-NEXT: or $3, $6, $3 -; MIPS64-NEXT: andi $3, $3, 65535 -; MIPS64-NEXT: or $3, $3, $4 +; MIPS64-NEXT: addu $4, $5, $4 +; MIPS64-NEXT: sll $4, $4, 8 +; MIPS64-NEXT: or $4, $6, $4 +; MIPS64-NEXT: andi $4, $4, 65535 +; MIPS64-NEXT: or $3, $4, $3 ; MIPS64-NEXT: dsll $3, $3, 32 ; MIPS64-NEXT: dsrl $3, $3, 32 ; MIPS64-NEXT: or $3, $3, $1 @@ -6617,24 +6617,24 @@ ; MIPS64R5EB: # %bb.0: # %Entry ; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 ; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EB-NEXT: sh $5, 20($sp) ; MIPS64R5EB-NEXT: dsrl $1, $5, 16 ; MIPS64R5EB-NEXT: sw $1, 16($sp) -; MIPS64R5EB-NEXT: sh $5, 20($sp) +; MIPS64R5EB-NEXT: sh $4, 28($sp) ; MIPS64R5EB-NEXT: dsrl $1, $4, 16 ; MIPS64R5EB-NEXT: sw $1, 24($sp) -; MIPS64R5EB-NEXT: sh $4, 28($sp) -; MIPS64R5EB-NEXT: lb $1, 19($sp) -; MIPS64R5EB-NEXT: dsll $1, $1, 8 -; MIPS64R5EB-NEXT: lbu $2, 20($sp) -; MIPS64R5EB-NEXT: or $1, $1, $2 +; MIPS64R5EB-NEXT: lbu $1, 20($sp) ; MIPS64R5EB-NEXT: dsll $1, $1, 8 -; MIPS64R5EB-NEXT: lb $2, 27($sp) +; MIPS64R5EB-NEXT: lb $2, 19($sp) +; MIPS64R5EB-NEXT: dsll $2, $2, 16 +; MIPS64R5EB-NEXT: or $1, $2, $1 +; MIPS64R5EB-NEXT: lbu $2, 28($sp) ; MIPS64R5EB-NEXT: dsll $2, $2, 8 -; MIPS64R5EB-NEXT: lbu $3, 28($sp) -; MIPS64R5EB-NEXT: or $2, $2, $3 -; MIPS64R5EB-NEXT: lbu $3, 21($sp) -; MIPS64R5EB-NEXT: dsll $2, $2, 8 -; MIPS64R5EB-NEXT: or $1, $3, $1 +; MIPS64R5EB-NEXT: lb $3, 27($sp) +; MIPS64R5EB-NEXT: dsll $3, $3, 16 +; MIPS64R5EB-NEXT: lbu $4, 21($sp) +; MIPS64R5EB-NEXT: or $2, $3, $2 +; MIPS64R5EB-NEXT: or $1, $4, $1 ; MIPS64R5EB-NEXT: lh $3, 16($sp) ; MIPS64R5EB-NEXT: dsll $3, $3, 8 ; MIPS64R5EB-NEXT: lbu $4, 18($sp) diff --git a/llvm/test/CodeGen/Mips/load-store-left-right.ll b/llvm/test/CodeGen/Mips/load-store-left-right.ll --- a/llvm/test/CodeGen/Mips/load-store-left-right.ll +++ b/llvm/test/CodeGen/Mips/load-store-left-right.ll @@ -977,12 +977,12 @@ ; MIPS32-EB-NEXT: addu $gp, $2, $25 ; MIPS32-EB-NEXT: lw $1, %got(arr)($gp) ; MIPS32-EB-NEXT: lwl $4, 0($1) -; MIPS32-EB-NEXT: lwr $4, 3($1) ; MIPS32-EB-NEXT: lbu $2, 5($1) +; MIPS32-EB-NEXT: lwr $4, 3($1) +; MIPS32-EB-NEXT: sll $2, $2, 16 ; MIPS32-EB-NEXT: lbu $3, 4($1) -; MIPS32-EB-NEXT: sll $3, $3, 8 +; MIPS32-EB-NEXT: sll $3, $3, 24 ; MIPS32-EB-NEXT: or $2, $3, $2 -; MIPS32-EB-NEXT: sll $2, $2, 16 ; MIPS32-EB-NEXT: lbu $1, 6($1) ; MIPS32-EB-NEXT: sll $1, $1, 8 ; MIPS32-EB-NEXT: lw $25, %call16(extern_func)($gp) @@ -1046,18 +1046,18 @@ ; MIPS64-EL-NEXT: daddu $1, $1, $25 ; MIPS64-EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(pass_array_byval))) ; MIPS64-EL-NEXT: ld $1, %got_disp(arr)($gp) -; MIPS64-EL-NEXT: lwl $2, 3($1) -; MIPS64-EL-NEXT: lwr $2, 0($1) -; MIPS64-EL-NEXT: daddiu $3, $zero, 1 -; MIPS64-EL-NEXT: dsll $3, $3, 32 -; MIPS64-EL-NEXT: daddiu $3, $3, -1 -; MIPS64-EL-NEXT: and $2, $2, $3 -; MIPS64-EL-NEXT: lbu $3, 4($1) -; MIPS64-EL-NEXT: lbu $4, 5($1) -; MIPS64-EL-NEXT: dsll $4, $4, 8 -; MIPS64-EL-NEXT: or $3, $4, $3 -; MIPS64-EL-NEXT: dsll $3, $3, 32 -; MIPS64-EL-NEXT: or $2, $2, $3 +; MIPS64-EL-NEXT: lbu $2, 4($1) +; MIPS64-EL-NEXT: dsll $2, $2, 32 +; MIPS64-EL-NEXT: lbu $3, 5($1) +; MIPS64-EL-NEXT: dsll $3, $3, 40 +; MIPS64-EL-NEXT: or $2, $3, $2 +; MIPS64-EL-NEXT: lwl $3, 3($1) +; MIPS64-EL-NEXT: lwr $3, 0($1) +; MIPS64-EL-NEXT: daddiu $4, $zero, 1 +; MIPS64-EL-NEXT: dsll $4, $4, 32 +; MIPS64-EL-NEXT: daddiu $4, $4, -1 +; MIPS64-EL-NEXT: and $3, $3, $4 +; MIPS64-EL-NEXT: or $2, $3, $2 ; MIPS64-EL-NEXT: lbu $1, 6($1) ; MIPS64-EL-NEXT: dsll $1, $1, 48 ; MIPS64-EL-NEXT: ld $25, %call16(extern_func)($gp) @@ -1079,15 +1079,15 @@ ; MIPS64-EB-NEXT: daddu $1, $1, $25 ; MIPS64-EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(pass_array_byval))) ; MIPS64-EB-NEXT: ld $1, %got_disp(arr)($gp) -; MIPS64-EB-NEXT: lwl $2, 0($1) -; MIPS64-EB-NEXT: lwr $2, 3($1) -; MIPS64-EB-NEXT: dsll $2, $2, 32 -; MIPS64-EB-NEXT: lbu $3, 5($1) -; MIPS64-EB-NEXT: lbu $4, 4($1) -; MIPS64-EB-NEXT: dsll $4, $4, 8 -; MIPS64-EB-NEXT: or $3, $4, $3 -; MIPS64-EB-NEXT: dsll $3, $3, 16 -; MIPS64-EB-NEXT: or $2, $2, $3 +; MIPS64-EB-NEXT: lbu $2, 5($1) +; MIPS64-EB-NEXT: dsll $2, $2, 16 +; MIPS64-EB-NEXT: lbu $3, 4($1) +; MIPS64-EB-NEXT: dsll $3, $3, 24 +; MIPS64-EB-NEXT: or $2, $3, $2 +; MIPS64-EB-NEXT: lwl $3, 0($1) +; MIPS64-EB-NEXT: lwr $3, 3($1) +; MIPS64-EB-NEXT: dsll $3, $3, 32 +; MIPS64-EB-NEXT: or $2, $3, $2 ; MIPS64-EB-NEXT: lbu $1, 6($1) ; MIPS64-EB-NEXT: dsll $1, $1, 8 ; MIPS64-EB-NEXT: ld $25, %call16(extern_func)($gp) @@ -1109,15 +1109,15 @@ ; MIPS64R2-EL-NEXT: daddu $1, $1, $25 ; MIPS64R2-EL-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(pass_array_byval))) ; MIPS64R2-EL-NEXT: ld $1, %got_disp(arr)($gp) -; MIPS64R2-EL-NEXT: lwl $2, 3($1) -; MIPS64R2-EL-NEXT: lwr $2, 0($1) -; MIPS64R2-EL-NEXT: dext $2, $2, 0, 32 -; MIPS64R2-EL-NEXT: lbu $3, 4($1) -; MIPS64R2-EL-NEXT: lbu $4, 5($1) -; MIPS64R2-EL-NEXT: dsll $4, $4, 8 -; MIPS64R2-EL-NEXT: or $3, $4, $3 -; MIPS64R2-EL-NEXT: dsll $3, $3, 32 -; MIPS64R2-EL-NEXT: or $2, $2, $3 +; MIPS64R2-EL-NEXT: lbu $2, 4($1) +; MIPS64R2-EL-NEXT: dsll $2, $2, 32 +; MIPS64R2-EL-NEXT: lbu $3, 5($1) +; MIPS64R2-EL-NEXT: dsll $3, $3, 40 +; MIPS64R2-EL-NEXT: or $2, $3, $2 +; MIPS64R2-EL-NEXT: lwl $3, 3($1) +; MIPS64R2-EL-NEXT: lwr $3, 0($1) +; MIPS64R2-EL-NEXT: dext $3, $3, 0, 32 +; MIPS64R2-EL-NEXT: or $2, $3, $2 ; MIPS64R2-EL-NEXT: lbu $1, 6($1) ; MIPS64R2-EL-NEXT: dsll $1, $1, 48 ; MIPS64R2-EL-NEXT: ld $25, %call16(extern_func)($gp) @@ -1140,10 +1140,10 @@ ; MIPS64R2-EB-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(pass_array_byval))) ; MIPS64R2-EB-NEXT: ld $1, %got_disp(arr)($gp) ; MIPS64R2-EB-NEXT: lbu $2, 5($1) +; MIPS64R2-EB-NEXT: dsll $2, $2, 16 ; MIPS64R2-EB-NEXT: lbu $3, 4($1) -; MIPS64R2-EB-NEXT: dsll $3, $3, 8 +; MIPS64R2-EB-NEXT: dsll $3, $3, 24 ; MIPS64R2-EB-NEXT: or $2, $3, $2 -; MIPS64R2-EB-NEXT: dsll $2, $2, 16 ; MIPS64R2-EB-NEXT: lwl $3, 0($1) ; MIPS64R2-EB-NEXT: lwr $3, 3($1) ; MIPS64R2-EB-NEXT: dext $3, $3, 0, 32 diff --git a/llvm/test/CodeGen/Mips/unalignedload.ll b/llvm/test/CodeGen/Mips/unalignedload.ll --- a/llvm/test/CodeGen/Mips/unalignedload.ll +++ b/llvm/test/CodeGen/Mips/unalignedload.ll @@ -43,14 +43,14 @@ ; MIPS32-EB-NEXT: addu $gp, $2, $25 ; MIPS32-EB-NEXT: lw $1, %got(s2)($gp) ; MIPS32-EB-NEXT: lbu $2, 3($1) +; MIPS32-EB-NEXT: sll $2, $2, 16 ; MIPS32-EB-NEXT: lbu $1, 2($1) -; MIPS32-EB-NEXT: sll $1, $1, 8 -; MIPS32-EB-NEXT: or $1, $1, $2 +; MIPS32-EB-NEXT: sll $1, $1, 24 ; MIPS32-EB-NEXT: lw $25, %call16(foo2)($gp) ; MIPS32-EB-NEXT: .reloc ($tmp0), R_MIPS_JALR, foo2 ; MIPS32-EB-NEXT: $tmp0: ; MIPS32-EB-NEXT: jalr $25 -; MIPS32-EB-NEXT: sll $4, $1, 16 +; MIPS32-EB-NEXT: or $4, $1, $2 ; MIPS32-EB-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-EB-NEXT: jr $ra ; MIPS32-EB-NEXT: addiu $sp, $sp, 24 @@ -130,12 +130,12 @@ ; MIPS32-EB-NEXT: addu $gp, $2, $25 ; MIPS32-EB-NEXT: lw $1, %got(s4)($gp) ; MIPS32-EB-NEXT: lwl $4, 0($1) -; MIPS32-EB-NEXT: lwr $4, 3($1) ; MIPS32-EB-NEXT: lbu $2, 5($1) +; MIPS32-EB-NEXT: lwr $4, 3($1) +; MIPS32-EB-NEXT: sll $2, $2, 16 ; MIPS32-EB-NEXT: lbu $3, 4($1) -; MIPS32-EB-NEXT: sll $3, $3, 8 +; MIPS32-EB-NEXT: sll $3, $3, 24 ; MIPS32-EB-NEXT: or $2, $3, $2 -; MIPS32-EB-NEXT: sll $2, $2, 16 ; MIPS32-EB-NEXT: lbu $1, 6($1) ; MIPS32-EB-NEXT: sll $1, $1, 8 ; MIPS32-EB-NEXT: lw $25, %call16(foo4)($gp) diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse.ll @@ -1499,19 +1499,17 @@ define i32 @pr55484(i32 %0) { ; RV32I-LABEL: pr55484: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: pr55484: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 8 -; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: slli a1, a0, 40 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ret ; @@ -1533,19 +1531,17 @@ ; ; RV32ZBKB-LABEL: pr55484: ; RV32ZBKB: # %bb.0: -; RV32ZBKB-NEXT: srli a1, a0, 8 -; RV32ZBKB-NEXT: slli a0, a0, 8 -; RV32ZBKB-NEXT: or a0, a1, a0 -; RV32ZBKB-NEXT: slli a0, a0, 16 +; RV32ZBKB-NEXT: slli a1, a0, 8 +; RV32ZBKB-NEXT: slli a0, a0, 24 +; RV32ZBKB-NEXT: or a0, a0, a1 ; RV32ZBKB-NEXT: srai a0, a0, 16 ; RV32ZBKB-NEXT: ret ; ; RV64ZBKB-LABEL: pr55484: ; RV64ZBKB: # %bb.0: -; RV64ZBKB-NEXT: srli a1, a0, 8 -; RV64ZBKB-NEXT: slli a0, a0, 8 -; RV64ZBKB-NEXT: or a0, a1, a0 -; RV64ZBKB-NEXT: slli a0, a0, 48 +; RV64ZBKB-NEXT: slli a1, a0, 40 +; RV64ZBKB-NEXT: slli a0, a0, 56 +; RV64ZBKB-NEXT: or a0, a0, a1 ; RV64ZBKB-NEXT: srai a0, a0, 48 ; RV64ZBKB-NEXT: ret %2 = lshr i32 %0, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -419,13 +419,13 @@ ; RV32-NEXT: # %bb.1: # %cond.load ; RV32-NEXT: lbu a3, 1(a0) ; RV32-NEXT: lbu a4, 0(a0) -; RV32-NEXT: lbu a5, 3(a0) -; RV32-NEXT: lbu a6, 2(a0) +; RV32-NEXT: lbu a5, 2(a0) +; RV32-NEXT: lbu a6, 3(a0) ; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: or a3, a3, a4 -; RV32-NEXT: slli a4, a5, 8 -; RV32-NEXT: or a4, a4, a6 -; RV32-NEXT: slli a4, a4, 16 +; RV32-NEXT: slli a4, a5, 16 +; RV32-NEXT: slli a5, a6, 24 +; RV32-NEXT: or a4, a5, a4 ; RV32-NEXT: or a3, a4, a3 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 @@ -440,13 +440,13 @@ ; RV32-NEXT: .LBB8_3: # %cond.load1 ; RV32-NEXT: lbu a2, 5(a0) ; RV32-NEXT: lbu a3, 4(a0) -; RV32-NEXT: lbu a4, 7(a0) -; RV32-NEXT: lbu a0, 6(a0) +; RV32-NEXT: lbu a4, 6(a0) +; RV32-NEXT: lbu a0, 7(a0) ; RV32-NEXT: slli a2, a2, 8 ; RV32-NEXT: or a2, a2, a3 -; RV32-NEXT: slli a3, a4, 8 -; RV32-NEXT: or a0, a3, a0 -; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: slli a3, a4, 16 +; RV32-NEXT: slli a0, a0, 24 +; RV32-NEXT: or a0, a0, a3 ; RV32-NEXT: or a0, a0, a2 ; RV32-NEXT: vmv.s.x v9, a0 ; RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma @@ -467,13 +467,13 @@ ; RV64-NEXT: # %bb.1: # %cond.load ; RV64-NEXT: lbu a3, 1(a0) ; RV64-NEXT: lbu a4, 0(a0) -; RV64-NEXT: lb a5, 3(a0) -; RV64-NEXT: lbu a6, 2(a0) +; RV64-NEXT: lbu a5, 2(a0) +; RV64-NEXT: lb a6, 3(a0) ; RV64-NEXT: slli a3, a3, 8 ; RV64-NEXT: or a3, a3, a4 -; RV64-NEXT: slli a4, a5, 8 -; RV64-NEXT: or a4, a4, a6 -; RV64-NEXT: slli a4, a4, 16 +; RV64-NEXT: slli a4, a5, 16 +; RV64-NEXT: slli a5, a6, 24 +; RV64-NEXT: or a4, a5, a4 ; RV64-NEXT: or a3, a4, a3 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vmv.v.x v8, a3 @@ -488,13 +488,13 @@ ; RV64-NEXT: .LBB8_3: # %cond.load1 ; RV64-NEXT: lbu a2, 5(a0) ; RV64-NEXT: lbu a3, 4(a0) -; RV64-NEXT: lb a4, 7(a0) -; RV64-NEXT: lbu a0, 6(a0) +; RV64-NEXT: lbu a4, 6(a0) +; RV64-NEXT: lb a0, 7(a0) ; RV64-NEXT: slli a2, a2, 8 ; RV64-NEXT: or a2, a2, a3 -; RV64-NEXT: slli a3, a4, 8 -; RV64-NEXT: or a0, a3, a0 -; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: slli a3, a4, 16 +; RV64-NEXT: slli a0, a0, 24 +; RV64-NEXT: or a0, a0, a3 ; RV64-NEXT: or a0, a0, a2 ; RV64-NEXT: vmv.s.x v9, a0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -392,14 +392,13 @@ ; RV64-NEXT: lb a0, 12(a0) ; RV64-NEXT: lwu a1, 8(s0) ; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: ld a2, 0(s0) +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: slli a0, a0, 29 ; RV64-NEXT: srai s1, a0, 31 -; RV64-NEXT: slli a0, a1, 31 -; RV64-NEXT: srli a1, a2, 33 +; RV64-NEXT: srli a0, a2, 2 +; RV64-NEXT: slli a1, a1, 62 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 31 ; RV64-NEXT: srai a0, a0, 31 ; RV64-NEXT: slli a1, a2, 31 ; RV64-NEXT: srai s2, a1, 31 @@ -428,14 +427,14 @@ ; RV64-NEXT: neg a0, a0 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: slli a3, a1, 29 -; RV64-NEXT: srli a3, a3, 61 -; RV64-NEXT: sb a3, 12(s0) -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: slli a3, a2, 31 -; RV64-NEXT: srli a3, a3, 62 -; RV64-NEXT: or a1, a3, a1 -; RV64-NEXT: sw a1, 8(s0) +; RV64-NEXT: slli a3, a1, 2 +; RV64-NEXT: slli a4, a2, 31 +; RV64-NEXT: srli a4, a4, 62 +; RV64-NEXT: or a3, a4, a3 +; RV64-NEXT: sw a3, 8(s0) +; RV64-NEXT: slli a1, a1, 29 +; RV64-NEXT: srli a1, a1, 61 +; RV64-NEXT: sb a1, 12(s0) ; RV64-NEXT: slli a0, a0, 31 ; RV64-NEXT: srli a0, a0, 31 ; RV64-NEXT: slli a1, a2, 33 @@ -533,68 +532,67 @@ ; ; RV64M-LABEL: test_srem_vec: ; RV64M: # %bb.0: -; RV64M-NEXT: lb a1, 12(a0) +; RV64M-NEXT: ld a1, 0(a0) ; RV64M-NEXT: lwu a2, 8(a0) -; RV64M-NEXT: slli a1, a1, 32 -; RV64M-NEXT: or a1, a2, a1 -; RV64M-NEXT: ld a3, 0(a0) -; RV64M-NEXT: slli a1, a1, 29 -; RV64M-NEXT: srai a1, a1, 31 -; RV64M-NEXT: slli a2, a2, 31 -; RV64M-NEXT: srli a4, a3, 33 +; RV64M-NEXT: srli a3, a1, 2 +; RV64M-NEXT: lb a4, 12(a0) +; RV64M-NEXT: slli a5, a2, 62 +; RV64M-NEXT: or a3, a5, a3 +; RV64M-NEXT: srai a3, a3, 31 +; RV64M-NEXT: slli a4, a4, 32 ; RV64M-NEXT: lui a5, %hi(.LCPI3_0) ; RV64M-NEXT: ld a5, %lo(.LCPI3_0)(a5) -; RV64M-NEXT: or a2, a4, a2 -; RV64M-NEXT: slli a2, a2, 31 +; RV64M-NEXT: or a2, a2, a4 +; RV64M-NEXT: slli a2, a2, 29 ; RV64M-NEXT: srai a2, a2, 31 ; RV64M-NEXT: mulh a4, a2, a5 ; RV64M-NEXT: srli a5, a4, 63 ; RV64M-NEXT: srai a4, a4, 1 ; RV64M-NEXT: add a4, a4, a5 -; RV64M-NEXT: slli a5, a4, 3 -; RV64M-NEXT: sub a4, a4, a5 +; RV64M-NEXT: slli a5, a4, 2 +; RV64M-NEXT: add a4, a5, a4 ; RV64M-NEXT: lui a5, %hi(.LCPI3_1) ; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64M-NEXT: slli a3, a3, 31 -; RV64M-NEXT: srai a3, a3, 31 +; RV64M-NEXT: slli a1, a1, 31 +; RV64M-NEXT: srai a1, a1, 31 ; RV64M-NEXT: add a2, a2, a4 -; RV64M-NEXT: mulh a4, a1, a5 +; RV64M-NEXT: mulh a4, a3, a5 ; RV64M-NEXT: srli a5, a4, 63 ; RV64M-NEXT: srai a4, a4, 1 ; RV64M-NEXT: add a4, a4, a5 -; RV64M-NEXT: slli a5, a4, 2 -; RV64M-NEXT: add a4, a5, a4 -; RV64M-NEXT: add a1, a1, a4 -; RV64M-NEXT: addi a1, a1, -2 -; RV64M-NEXT: seqz a1, a1 +; RV64M-NEXT: slli a5, a4, 3 +; RV64M-NEXT: sub a4, a4, a5 +; RV64M-NEXT: add a3, a3, a4 +; RV64M-NEXT: addi a3, a3, -1 +; RV64M-NEXT: seqz a3, a3 ; RV64M-NEXT: lui a4, %hi(.LCPI3_2) ; RV64M-NEXT: ld a4, %lo(.LCPI3_2)(a4) ; RV64M-NEXT: lui a5, %hi(.LCPI3_3) ; RV64M-NEXT: ld a5, %lo(.LCPI3_3)(a5) -; RV64M-NEXT: addi a2, a2, -1 +; RV64M-NEXT: addi a2, a2, -2 ; RV64M-NEXT: seqz a2, a2 -; RV64M-NEXT: mul a3, a3, a4 -; RV64M-NEXT: add a3, a3, a5 -; RV64M-NEXT: slli a4, a3, 63 -; RV64M-NEXT: srli a3, a3, 1 -; RV64M-NEXT: or a3, a3, a4 -; RV64M-NEXT: sltu a3, a5, a3 +; RV64M-NEXT: mul a1, a1, a4 +; RV64M-NEXT: add a1, a1, a5 +; RV64M-NEXT: slli a4, a1, 63 +; RV64M-NEXT: srli a1, a1, 1 +; RV64M-NEXT: or a1, a1, a4 +; RV64M-NEXT: sltu a1, a5, a1 ; RV64M-NEXT: addi a2, a2, -1 -; RV64M-NEXT: addi a1, a1, -1 -; RV64M-NEXT: neg a3, a3 -; RV64M-NEXT: slli a4, a1, 29 -; RV64M-NEXT: srli a4, a4, 61 -; RV64M-NEXT: sb a4, 12(a0) -; RV64M-NEXT: slli a4, a2, 33 +; RV64M-NEXT: addi a3, a3, -1 +; RV64M-NEXT: neg a1, a1 +; RV64M-NEXT: slli a4, a3, 33 +; RV64M-NEXT: slli a1, a1, 31 +; RV64M-NEXT: srli a1, a1, 31 +; RV64M-NEXT: or a1, a1, a4 +; RV64M-NEXT: sd a1, 0(a0) +; RV64M-NEXT: slli a1, a2, 2 ; RV64M-NEXT: slli a3, a3, 31 -; RV64M-NEXT: srli a3, a3, 31 -; RV64M-NEXT: or a3, a3, a4 -; RV64M-NEXT: sd a3, 0(a0) -; RV64M-NEXT: slli a1, a1, 2 -; RV64M-NEXT: slli a2, a2, 31 -; RV64M-NEXT: srli a2, a2, 62 -; RV64M-NEXT: or a1, a2, a1 +; RV64M-NEXT: srli a3, a3, 62 +; RV64M-NEXT: or a1, a3, a1 ; RV64M-NEXT: sw a1, 8(a0) +; RV64M-NEXT: slli a1, a2, 29 +; RV64M-NEXT: srli a1, a1, 61 +; RV64M-NEXT: sb a1, 12(a0) ; RV64M-NEXT: ret ; ; RV32MV-LABEL: test_srem_vec: @@ -714,49 +712,48 @@ ; RV64MV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64MV-NEXT: addi s0, sp, 64 ; RV64MV-NEXT: andi sp, sp, -32 -; RV64MV-NEXT: lwu a1, 8(a0) -; RV64MV-NEXT: ld a2, 0(a0) -; RV64MV-NEXT: slli a3, a1, 31 -; RV64MV-NEXT: srli a4, a2, 33 -; RV64MV-NEXT: lb a5, 12(a0) -; RV64MV-NEXT: or a3, a4, a3 +; RV64MV-NEXT: lb a1, 12(a0) +; RV64MV-NEXT: lwu a2, 8(a0) +; RV64MV-NEXT: slli a1, a1, 32 +; RV64MV-NEXT: ld a3, 0(a0) +; RV64MV-NEXT: or a1, a2, a1 +; RV64MV-NEXT: slli a1, a1, 29 +; RV64MV-NEXT: srai a1, a1, 31 +; RV64MV-NEXT: srli a4, a3, 2 +; RV64MV-NEXT: slli a2, a2, 62 +; RV64MV-NEXT: lui a5, %hi(.LCPI3_0) +; RV64MV-NEXT: ld a5, %lo(.LCPI3_0)(a5) +; RV64MV-NEXT: or a2, a2, a4 ; RV64MV-NEXT: slli a3, a3, 31 ; RV64MV-NEXT: srai a3, a3, 31 -; RV64MV-NEXT: slli a4, a5, 32 -; RV64MV-NEXT: or a1, a1, a4 -; RV64MV-NEXT: lui a4, %hi(.LCPI3_0) -; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4) -; RV64MV-NEXT: slli a1, a1, 29 -; RV64MV-NEXT: slli a2, a2, 31 -; RV64MV-NEXT: srai a2, a2, 31 -; RV64MV-NEXT: mulh a4, a2, a4 +; RV64MV-NEXT: mulh a4, a3, a5 ; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: add a4, a4, a5 ; RV64MV-NEXT: li a5, 6 ; RV64MV-NEXT: mul a4, a4, a5 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64MV-NEXT: srai a1, a1, 31 -; RV64MV-NEXT: sub a2, a2, a4 -; RV64MV-NEXT: sd a2, 0(sp) -; RV64MV-NEXT: mulh a2, a1, a5 -; RV64MV-NEXT: srli a4, a2, 63 -; RV64MV-NEXT: srai a2, a2, 1 -; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: slli a4, a2, 2 +; RV64MV-NEXT: srai a2, a2, 31 +; RV64MV-NEXT: sub a3, a3, a4 +; RV64MV-NEXT: sd a3, 0(sp) +; RV64MV-NEXT: mulh a3, a2, a5 +; RV64MV-NEXT: srli a4, a3, 63 +; RV64MV-NEXT: srai a3, a3, 1 +; RV64MV-NEXT: add a3, a3, a4 +; RV64MV-NEXT: slli a4, a3, 3 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) -; RV64MV-NEXT: add a2, a4, a2 +; RV64MV-NEXT: sub a3, a3, a4 +; RV64MV-NEXT: add a2, a2, a3 +; RV64MV-NEXT: sd a2, 8(sp) +; RV64MV-NEXT: mulh a2, a1, a5 +; RV64MV-NEXT: srli a3, a2, 63 +; RV64MV-NEXT: srai a2, a2, 1 +; RV64MV-NEXT: add a2, a2, a3 +; RV64MV-NEXT: slli a3, a2, 2 +; RV64MV-NEXT: add a2, a3, a2 ; RV64MV-NEXT: add a1, a1, a2 ; RV64MV-NEXT: sd a1, 16(sp) -; RV64MV-NEXT: mulh a1, a3, a5 -; RV64MV-NEXT: srli a2, a1, 63 -; RV64MV-NEXT: srai a1, a1, 1 -; RV64MV-NEXT: add a1, a1, a2 -; RV64MV-NEXT: slli a2, a1, 3 -; RV64MV-NEXT: sub a1, a1, a2 -; RV64MV-NEXT: add a1, a3, a1 -; RV64MV-NEXT: sd a1, 8(sp) ; RV64MV-NEXT: mv a1, sp ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64MV-NEXT: vle64.v v8, (a1) diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -64,13 +64,13 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a1, 1(a0) ; RV32I-NEXT: lbu a2, 0(a0) -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu a0, 2(a0) +; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a3, 8 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: slli a2, a3, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; @@ -78,13 +78,13 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a1, 1(a0) ; RV64I-NEXT: lbu a2, 0(a0) -; RV64I-NEXT: lb a3, 3(a0) -; RV64I-NEXT: lbu a0, 2(a0) +; RV64I-NEXT: lbu a3, 2(a0) +; RV64I-NEXT: lb a0, 3(a0) ; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a3, 8 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: slli a2, a3, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; @@ -101,23 +101,23 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a1, 1(a0) ; RV32I-NEXT: lbu a2, 0(a0) -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu a4, 2(a0) +; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: lbu a4, 3(a0) ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a3, 8 -; RV32I-NEXT: or a2, a2, a4 -; RV32I-NEXT: slli a2, a2, 16 +; RV32I-NEXT: slli a2, a3, 16 +; RV32I-NEXT: slli a3, a4, 24 +; RV32I-NEXT: or a2, a3, a2 ; RV32I-NEXT: or a2, a2, a1 ; RV32I-NEXT: lbu a1, 5(a0) ; RV32I-NEXT: lbu a3, 4(a0) -; RV32I-NEXT: lbu a4, 7(a0) -; RV32I-NEXT: lbu a0, 6(a0) +; RV32I-NEXT: lbu a4, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: slli a3, a4, 8 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: slli a3, a4, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret @@ -126,23 +126,23 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a1, 1(a0) ; RV64I-NEXT: lbu a2, 0(a0) -; RV64I-NEXT: lbu a3, 3(a0) -; RV64I-NEXT: lbu a4, 2(a0) +; RV64I-NEXT: lbu a3, 2(a0) +; RV64I-NEXT: lbu a4, 3(a0) ; RV64I-NEXT: slli a1, a1, 8 ; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a3, 8 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: slli a2, a3, 16 +; RV64I-NEXT: slli a3, a4, 24 +; RV64I-NEXT: or a2, a3, a2 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: lbu a2, 5(a0) ; RV64I-NEXT: lbu a3, 4(a0) -; RV64I-NEXT: lbu a4, 7(a0) -; RV64I-NEXT: lbu a0, 6(a0) +; RV64I-NEXT: lbu a4, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: or a2, a2, a3 -; RV64I-NEXT: slli a3, a4, 8 -; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: slli a0, a0, 16 +; RV64I-NEXT: slli a3, a4, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a1 diff --git a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll --- a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -73,46 +73,41 @@ define void @fun2(<8 x i32> %src, ptr %p) ; CHECK-LABEL: fun2: ; CHECK: # %bb.0: -; CHECK-NEXT: stmg %r14, %r15, 112(%r15) -; CHECK-NEXT: .cfi_offset %r14, -48 -; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: vlgvf %r1, %v26, 3 -; CHECK-NEXT: vlgvf %r0, %v26, 2 +; CHECK-NEXT: vlgvf %r5, %v24, 0 +; CHECK-NEXT: vlgvf %r3, %v24, 1 +; CHECK-NEXT: srlk %r0, %r1, 8 +; CHECK-NEXT: sllg %r5, %r5, 33 +; CHECK-NEXT: sth %r0, 28(%r2) +; CHECK-NEXT: rosbg %r5, %r3, 31, 55, 2 +; CHECK-NEXT: vlgvf %r0, %v24, 2 +; CHECK-NEXT: sllg %r4, %r3, 58 +; CHECK-NEXT: vlgvf %r3, %v26, 2 ; CHECK-NEXT: stc %r1, 30(%r2) -; CHECK-NEXT: srlk %r3, %r1, 8 +; CHECK-NEXT: rosbg %r4, %r0, 6, 36, 27 ; CHECK-NEXT: risbgn %r1, %r1, 33, 167, 0 -; CHECK-NEXT: vlgvf %r5, %v24, 2 -; CHECK-NEXT: rosbg %r1, %r0, 2, 32, 31 -; CHECK-NEXT: sth %r3, 28(%r2) +; CHECK-NEXT: rosbg %r1, %r3, 2, 32, 31 ; CHECK-NEXT: srlg %r1, %r1, 24 -; CHECK-NEXT: vlgvf %r3, %v24, 3 +; CHECK-NEXT: rosbg %r5, %r4, 56, 63, 8 +; CHECK-NEXT: vlgvf %r4, %v24, 3 ; CHECK-NEXT: st %r1, 24(%r2) ; CHECK-NEXT: vlgvf %r1, %v26, 0 -; CHECK-NEXT: risbgn %r14, %r5, 6, 164, 27 -; CHECK-NEXT: sllg %r4, %r3, 60 -; CHECK-NEXT: rosbg %r14, %r3, 37, 63, 60 -; CHECK-NEXT: sllg %r3, %r14, 8 -; CHECK-NEXT: rosbg %r4, %r1, 4, 34, 29 -; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8 -; CHECK-NEXT: stg %r3, 8(%r2) -; CHECK-NEXT: vlgvf %r3, %v24, 1 -; CHECK-NEXT: sllg %r4, %r3, 58 -; CHECK-NEXT: rosbg %r4, %r5, 6, 36, 27 -; CHECK-NEXT: vlgvf %r5, %v24, 0 -; CHECK-NEXT: sllg %r5, %r5, 25 -; CHECK-NEXT: rosbg %r5, %r3, 39, 63, 58 -; CHECK-NEXT: sllg %r3, %r5, 8 -; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8 -; CHECK-NEXT: stg %r3, 0(%r2) -; CHECK-NEXT: vlgvf %r3, %v26, 1 -; CHECK-NEXT: sllg %r4, %r3, 62 -; CHECK-NEXT: rosbg %r4, %r0, 2, 32, 31 -; CHECK-NEXT: risbgn %r0, %r1, 4, 162, 29 -; CHECK-NEXT: rosbg %r0, %r3, 35, 63, 62 +; CHECK-NEXT: risbgn %r0, %r0, 6, 164, 27 +; CHECK-NEXT: rosbg %r0, %r4, 37, 63, 60 +; CHECK-NEXT: stg %r5, 0(%r2) +; CHECK-NEXT: sllg %r5, %r4, 60 ; CHECK-NEXT: sllg %r0, %r0, 8 +; CHECK-NEXT: rosbg %r5, %r1, 4, 34, 29 +; CHECK-NEXT: risbgn %r1, %r1, 4, 162, 29 +; CHECK-NEXT: rosbg %r0, %r5, 56, 63, 8 +; CHECK-NEXT: stg %r0, 8(%r2) +; CHECK-NEXT: vlgvf %r0, %v26, 1 +; CHECK-NEXT: sllg %r4, %r0, 62 +; CHECK-NEXT: rosbg %r1, %r0, 35, 63, 62 +; CHECK-NEXT: sllg %r0, %r1, 8 +; CHECK-NEXT: rosbg %r4, %r3, 2, 32, 31 ; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8 ; CHECK-NEXT: stg %r0, 16(%r2) -; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 { %tmp = trunc <8 x i32> %src to <8 x i31> diff --git a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll @@ -31,12 +31,12 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: ldr r1, .LCPI1_0 ; CHECK-NEXT: muls r1, r0, r1 -; CHECK-NEXT: lsls r0, r1, #26 +; CHECK-NEXT: lsls r0, r1, #31 ; CHECK-NEXT: ldr r2, .LCPI1_1 ; CHECK-NEXT: ands r2, r1 ; CHECK-NEXT: lsrs r1, r2, #1 -; CHECK-NEXT: adds r0, r1, r0 -; CHECK-NEXT: lsls r0, r0, #5 +; CHECK-NEXT: lsls r1, r1, #5 +; CHECK-NEXT: adds r0, r0, r1 ; CHECK-NEXT: ldr r1, .LCPI1_2 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: blo .LBB1_2 diff --git a/llvm/test/CodeGen/X86/bool-vector.ll b/llvm/test/CodeGen/X86/bool-vector.ll --- a/llvm/test/CodeGen/X86/bool-vector.ll +++ b/llvm/test/CodeGen/X86/bool-vector.ll @@ -13,10 +13,10 @@ ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah -; X86-NEXT: addb %ah, %ah +; X86-NEXT: shlb $3, %ah ; X86-NEXT: andb $1, %cl -; X86-NEXT: orb %ah, %cl ; X86-NEXT: shlb $2, %cl +; X86-NEXT: orb %ah, %cl ; X86-NEXT: addb %dl, %dl ; X86-NEXT: andb $1, %al ; X86-NEXT: orb %dl, %al @@ -28,10 +28,10 @@ ; ; X64-LABEL: PR15215_bad: ; X64: # %bb.0: # %entry -; X64-NEXT: addb %cl, %cl +; X64-NEXT: shlb $3, %cl ; X64-NEXT: andb $1, %dl -; X64-NEXT: orb %cl, %dl ; X64-NEXT: shlb $2, %dl +; X64-NEXT: orb %cl, %dl ; X64-NEXT: addb %sil, %sil ; X64-NEXT: andb $1, %dil ; X64-NEXT: orb %sil, %dil diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll --- a/llvm/test/CodeGen/X86/combine-bitreverse.ll +++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll @@ -233,13 +233,13 @@ ; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 ; X86-NEXT: shrl $2, %eax ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-NEXT: leal (%eax,%ecx,4), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $5592405, %ecx # imm = 0x555555 -; X86-NEXT: shrl %eax -; X86-NEXT: andl $22369621, %eax # imm = 0x1555555 -; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: shll $7, %eax +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: andl $5592405, %eax # imm = 0x555555 +; X86-NEXT: shll $6, %ecx +; X86-NEXT: andl $-1431655808, %ecx # imm = 0xAAAAAA80 +; X86-NEXT: shll $8, %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: bswapl %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $986895, %ecx # imm = 0xF0F0F @@ -276,22 +276,22 @@ ; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5592405, %ecx # imm = 0x555555 -; X64-NEXT: shrl %eax -; X64-NEXT: andl $22369621, %eax # imm = 0x1555555 -; X64-NEXT: leal (%rax,%rcx,2), %eax -; X64-NEXT: shll $7, %eax -; X64-NEXT: bswapl %eax -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $986895, %ecx # imm = 0xF0F0F -; X64-NEXT: shll $4, %ecx -; X64-NEXT: shrl $4, %eax -; X64-NEXT: andl $135204623, %eax # imm = 0x80F0F0F -; X64-NEXT: orl %ecx, %eax -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $3355443, %ecx # imm = 0x333333 -; X64-NEXT: shrl $2, %eax -; X64-NEXT: andl $36909875, %eax # imm = 0x2333333 -; X64-NEXT: leal (%rax,%rcx,4), %eax +; X64-NEXT: shll $6, %eax +; X64-NEXT: andl $-1431655808, %eax # imm = 0xAAAAAA80 +; X64-NEXT: shll $8, %ecx +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: bswapl %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: andl $986895, %eax # imm = 0xF0F0F +; X64-NEXT: shll $4, %eax +; X64-NEXT: shrl $4, %ecx +; X64-NEXT: andl $135204623, %ecx # imm = 0x80F0F0F +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: andl $3355443, %eax # imm = 0x333333 +; X64-NEXT: shrl $2, %ecx +; X64-NEXT: andl $36909875, %ecx # imm = 0x2333333 +; X64-NEXT: leal (%rcx,%rax,4), %eax ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 ; X64-NEXT: shrl %eax @@ -322,10 +322,8 @@ ; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $357913941, %ecx # imm = 0x15555555 -; X86-NEXT: shrl %eax -; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: addl %eax, %eax +; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: bswapl %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $235867919, %ecx # imm = 0xE0F0F0F diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll --- a/llvm/test/CodeGen/X86/is_fpclass.ll +++ b/llvm/test/CodeGen/X86/is_fpclass.ll @@ -857,12 +857,13 @@ ; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-32-NEXT: sahf ; CHECK-32-NEXT: setp %dh +; CHECK-32-NEXT: shlb $2, %dh ; CHECK-32-NEXT: fucomp %st(0) ; CHECK-32-NEXT: fnstsw %ax ; CHECK-32-NEXT: # kill: def $ah killed $ah killed $ax ; CHECK-32-NEXT: sahf ; CHECK-32-NEXT: setp %dl -; CHECK-32-NEXT: addb %dl, %dl +; CHECK-32-NEXT: shlb $3, %dl ; CHECK-32-NEXT: orb %dh, %dl ; CHECK-32-NEXT: fucomp %st(0) ; CHECK-32-NEXT: fnstsw %ax @@ -876,7 +877,6 @@ ; CHECK-32-NEXT: setp %al ; CHECK-32-NEXT: addb %al, %al ; CHECK-32-NEXT: orb %dh, %al -; CHECK-32-NEXT: shlb $2, %al ; CHECK-32-NEXT: orb %dl, %al ; CHECK-32-NEXT: movb %al, (%ecx) ; CHECK-32-NEXT: movl %ecx, %eax @@ -903,11 +903,12 @@ ; CHECK-32-NEXT: andl %ecx, %edx ; CHECK-32-NEXT: cmpl $2139095041, %edx # imm = 0x7F800001 ; CHECK-32-NEXT: setge %dh +; CHECK-32-NEXT: shlb $2, %dh ; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-32-NEXT: andl %ecx, %esi ; CHECK-32-NEXT: cmpl $2139095041, %esi # imm = 0x7F800001 ; CHECK-32-NEXT: setge %dl -; CHECK-32-NEXT: addb %dl, %dl +; CHECK-32-NEXT: shlb $3, %dl ; CHECK-32-NEXT: orb %dh, %dl ; CHECK-32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-32-NEXT: andl %ecx, %esi @@ -918,7 +919,6 @@ ; CHECK-32-NEXT: setge %cl ; CHECK-32-NEXT: addb %cl, %cl ; CHECK-32-NEXT: orb %dh, %cl -; CHECK-32-NEXT: shlb $2, %cl ; CHECK-32-NEXT: orb %dl, %cl ; CHECK-32-NEXT: movb %cl, (%eax) ; CHECK-32-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -3519,11 +3519,11 @@ ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movl 8(%rdi), %ecx -; SSE2-NEXT: shll $13, %ecx +; SSE2-NEXT: shll $28, %ecx ; SSE2-NEXT: movq %rax, %rdx ; SSE2-NEXT: shrq $51, %rdx -; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: shll $15, %edx +; SSE2-NEXT: orl %ecx, %edx ; SSE2-NEXT: sarl $15, %edx ; SSE2-NEXT: movd %edx, %xmm1 ; SSE2-NEXT: shrq $34, %rax @@ -3548,11 +3548,11 @@ ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSSE3-NEXT: movl 8(%rdi), %ecx -; SSSE3-NEXT: shll $13, %ecx +; SSSE3-NEXT: shll $28, %ecx ; SSSE3-NEXT: movq %rax, %rdx ; SSSE3-NEXT: shrq $51, %rdx -; SSSE3-NEXT: orl %ecx, %edx ; SSSE3-NEXT: shll $15, %edx +; SSSE3-NEXT: orl %ecx, %edx ; SSSE3-NEXT: sarl $15, %edx ; SSSE3-NEXT: movd %edx, %xmm1 ; SSSE3-NEXT: shrq $34, %rax @@ -3581,10 +3581,10 @@ ; SSE41-NEXT: sarl $15, %ecx ; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 ; SSE41-NEXT: movl 8(%rdi), %ecx -; SSE41-NEXT: shll $13, %ecx +; SSE41-NEXT: shll $28, %ecx ; SSE41-NEXT: shrq $51, %rax -; SSE41-NEXT: orl %ecx, %eax ; SSE41-NEXT: shll $15, %eax +; SSE41-NEXT: orl %ecx, %eax ; SSE41-NEXT: sarl $15, %eax ; SSE41-NEXT: pinsrd $3, %eax, %xmm0 ; SSE41-NEXT: retq @@ -3607,10 +3607,10 @@ ; AVX-NEXT: sarl $15, %ecx ; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ; AVX-NEXT: movl 8(%rdi), %ecx -; AVX-NEXT: shll $13, %ecx +; AVX-NEXT: shll $28, %ecx ; AVX-NEXT: shrq $51, %rax -; AVX-NEXT: orl %ecx, %eax ; AVX-NEXT: shll $15, %eax +; AVX-NEXT: orl %ecx, %eax ; AVX-NEXT: sarl $15, %eax ; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq