Index: llvm/include/llvm/CodeGen/MachineScheduler.h =================================================================== --- llvm/include/llvm/CodeGen/MachineScheduler.h +++ llvm/include/llvm/CodeGen/MachineScheduler.h @@ -792,7 +792,7 @@ /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason : uint8_t { - NoCand, Only1, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, + NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; @@ -926,7 +926,7 @@ const TargetRegisterInfo *TRI, const MachineFunction &MF); unsigned getWeakLeft(const SUnit *SU, bool isTop); -int biasPhysRegCopy(const SUnit *SU, bool isTop); +int biasPhysReg(const SUnit *SU, bool isTop); /// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. @@ -1004,7 +1004,7 @@ const RegPressureTracker &RPTracker, SchedCandidate &Candidate); - void reschedulePhysRegCopies(SUnit *SU, bool isTop); + void reschedulePhysReg(SUnit *SU, bool isTop); }; /// PostGenericScheduler - Interface to the scheduling algorithm used by Index: llvm/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/lib/CodeGen/MachineScheduler.cpp +++ llvm/lib/CodeGen/MachineScheduler.cpp @@ -2515,7 +2515,7 @@ switch (Reason) { case NoCand: return "NOCAND "; case Only1: return "ONLY1 "; - case PhysRegCopy: return "PREG-COPY "; + case PhysReg: return "PHYS-REG "; case RegExcess: return "REG-EXCESS"; case RegCritical: return "REG-CRIT "; case Stall: return "STALL "; @@ -2851,24 +2851,33 @@ /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled /// with the operation that produces or consumes the physreg. We'll do this when /// regalloc has support for parallel copies. -int biasPhysRegCopy(const SUnit *SU, bool isTop) { +int biasPhysReg(const SUnit *SU, bool isTop) { const MachineInstr *MI = SU->getInstr(); - if (!MI->isCopy()) - return 0; - unsigned ScheduledOper = isTop ? 1 : 0; - unsigned UnscheduledOper = isTop ? 0 : 1; - // If we have already scheduled the physreg produce/consumer, immediately - // schedule the copy. - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(ScheduledOper).getReg())) - return 1; - // If the physreg is at the boundary, defer it. Otherwise schedule it - // immediately to free the dependent. We can hoist the copy later. - bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; - if (TargetRegisterInfo::isPhysicalRegister( - MI->getOperand(UnscheduledOper).getReg())) - return AtBoundary ? -1 : 1; + if (MI->isCopy()) { + unsigned ScheduledOper = isTop ? 1 : 0; + unsigned UnscheduledOper = isTop ? 0 : 1; + // If we have already scheduled the physreg produce/consumer, immediately + // schedule the copy. + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(ScheduledOper).getReg())) + return 1; + // If the physreg is at the boundary, defer it. Otherwise schedule it + // immediately to free the dependent. We can hoist the copy later. + bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(UnscheduledOper).getReg())) + return AtBoundary ? -1 : 1; + } + + if (MI->isMoveImmediate()) { + // If we have a move immediate and all successors have been assigned, bias + // towards scheduling this later. + if (MI->getOperand(0).isReg() && + TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).isReg())) + return isTop ? -1 : 1; + } + return 0; } } // end namespace llvm @@ -2929,9 +2938,10 @@ return; } - if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), - biasPhysRegCopy(Cand.SU, Cand.AtTop), - TryCand, Cand, PhysRegCopy)) + // Bias PhysReg Defs and copies to their uses and defined respectively. + if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), + TryCand, Cand, PhysReg)) return; // Avoid exceeding the target's limit. @@ -3178,7 +3188,7 @@ return SU; } -void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { +void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { MachineBasicBlock::iterator InsertPos = SU->getInstr(); if (!isTop) ++InsertPos; @@ -3193,10 +3203,11 @@ if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) continue; MachineInstr *Copy = DepSU->getInstr(); - if (!Copy->isCopy()) + if (!Copy->isCopy() && !Copy->isMoveImmediate()) continue; LLVM_DEBUG(dbgs() << " Rescheduling physreg copy "; DAG->dumpNode(*Dep.getSUnit())); + //assert(!Copy->isMoveImmediate()); DAG->moveInstruction(Copy, InsertPos); } } @@ -3207,18 +3218,18 @@ /// does. /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling -/// them here. See comments in biasPhysRegCopy. +/// them here. See comments in biasPhysReg. void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); Top.bumpNode(SU); if (SU->hasPhysRegUses) - reschedulePhysRegCopies(SU, true); + reschedulePhysReg(SU, true); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) - reschedulePhysRegCopies(SU, false); + reschedulePhysReg(SU, false); } } Index: llvm/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/lib/Target/X86/X86InstrCompiler.td +++ llvm/lib/Target/X86/X86InstrCompiler.td @@ -148,7 +148,7 @@ // These instructions XOR the frame pointer into a GPR. They are used in some // stack protection schemes. These are post-RA pseudos because we only know the // frame register after register allocation. -let Constraints = "$src = $dst", isPseudo = 1, Defs = [EFLAGS] in { +let Constraints = "$src = $dst", isMoveImm = 1, isPseudo = 1, Defs = [EFLAGS] in { def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src), "xorl\t$$FP, $src", []>, Requires<[NotLP64]>, Sched<[WriteALU]>; @@ -275,7 +275,7 @@ // Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isPseudo = 1, AddedComplexity = 10 in + isPseudo = 1, isMoveImm = 1, AddedComplexity = 10 in def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)]>, Sched<[WriteZero]>; Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -1493,7 +1493,7 @@ "mov{q}\t{$src, $dst|$dst, $src}", []>; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, imm:$src)]>; @@ -1507,7 +1507,7 @@ "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)]>; } -let isReMaterializable = 1 in { +let isReMaterializable = 1, isMoveImm = 1 in { def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), "movabs{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, relocImm:$src)]>; Index: llvm/test/CodeGen/AArch64/arm64-memset-inline.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -242,14 +242,15 @@ ret void } -; FIXME This could be better: x9 is a superset of w8's bit-pattern. +; FIXME This could be better: x8 is a superset of w8's bit-pattern. define void @memset_12_stack() { ; CHECK-LABEL: memset_12_stack: ; CHECK: mov w8, #-1431655766 -; CHECK-NEXT: mov x9, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str w8, [sp, #8] -; CHECK-NEXT: str x9, [sp] +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x0, sp + +; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl something %buf = alloca [12 x i8], align 1 %cast = bitcast [12 x i8]* %buf to i8* @@ -272,14 +273,14 @@ ret void } -; FIXME This could be better: x9 is a superset of w8's bit-pattern. +; FIXME This could be better: x8 is a superset of w8's bit-pattern. define void @memset_20_stack() { ; CHECK-LABEL: memset_20_stack: ; CHECK: mov w8, #-1431655766 -; CHECK-NEXT: mov x9, #-6148914691236517206 -; CHECK-NEXT: add x0, sp, #8 ; CHECK-NEXT: str w8, [sp, #24] -; CHECK-NEXT: stp x9, x9, [sp, #8] +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: add x0, sp, #8 +; CHECK-NEXT: stp x8, x8, [sp, #8] ; CHECK-NEXT: bl something %buf = alloca [20 x i8], align 1 %cast = bitcast [20 x i8]* %buf to i8* @@ -288,15 +289,15 @@ ret void } -; FIXME This could be better: x9 is a superset of w8's bit-pattern. +; FIXME This could be better: x8 is a superset of w8's bit-pattern. define void @memset_26_stack() { ; CHECK-LABEL: memset_26_stack: ; CHECK: mov w8, #43690 -; CHECK-NEXT: mov x9, #-6148914691236517206 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: stp x9, x9, [sp, #8] -; CHECK-NEXT: str x9, [sp] +; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: stp x8, x8, [sp, #8] +; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl something %buf = alloca [26 x i8], align 1 %cast = bitcast [26 x i8]* %buf to i8* @@ -320,8 +321,8 @@ define void @memset_40_stack() { ; CHECK-LABEL: memset_40_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: movi v0.16b, #170 +; CHECK: movi v0.16b, #170 +; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] @@ -349,8 +350,8 @@ define void @memset_72_stack() { ; CHECK-LABEL: memset_72_stack: -; CHECK: mov x8, #-6148914691236517206 -; CHECK-NEXT: movi v0.16b, #170 +; CHECK: movi v0.16b, #170 +; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] Index: llvm/test/CodeGen/AArch64/extract-bits.ll =================================================================== --- llvm/test/CodeGen/AArch64/extract-bits.ll +++ llvm/test/CodeGen/AArch64/extract-bits.ll @@ -439,8 +439,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: orr w9, wzr, #0x20 -; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: mov w10, #-1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr w8, w8, w1 ; CHECK-NEXT: lsr w9, w10, w9 @@ -533,8 +533,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: orr w9, wzr, #0x40 -; CHECK-NEXT: mov x10, #-1 ; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: mov x10, #-1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr x8, x8, x1 ; CHECK-NEXT: lsr x9, x10, x9 Index: llvm/test/CodeGen/AArch64/funnel-shift.ll =================================================================== --- llvm/test/CodeGen/AArch64/funnel-shift.ll +++ llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -41,8 +41,8 @@ ; CHECK-NEXT: and x9, x2, #0x1fffffffff ; CHECK-NEXT: movk x10, #56679, lsl #48 ; CHECK-NEXT: umulh x10, x9, x10 -; CHECK-NEXT: mov w11, #37 ; CHECK-NEXT: lsr x10, x10, #5 +; CHECK-NEXT: mov w11, #37 ; CHECK-NEXT: msub x9, x10, x11, x9 ; CHECK-NEXT: and x8, x1, #0x1fffffffff ; CHECK-NEXT: sub x11, x11, x9 @@ -169,8 +169,8 @@ ; CHECK-NEXT: and x9, x2, #0x1fffffffff ; CHECK-NEXT: movk x10, #56679, lsl #48 ; CHECK-NEXT: umulh x10, x9, x10 -; CHECK-NEXT: mov w11, #37 ; CHECK-NEXT: lsr x10, x10, #5 +; CHECK-NEXT: mov w11, #37 ; CHECK-NEXT: msub x9, x10, x11, x9 ; CHECK-NEXT: and x8, x1, #0x1fffffffff ; CHECK-NEXT: sub x10, x11, x9 Index: llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll +++ llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll @@ -9,7 +9,7 @@ ; CHECK-DAG: orr [[REG2:x[0-9]+]], xzr, #0x2 ; CHECK-DAG: orr [[REG3:x[0-9]+]], xzr, #0x3 ; CHECK-DAG: cmp x0, #0 -; CHECK: csel w[[SELECT_WREG_1:[0-9]+]], wzr, [[REG1]], ne +; CHECK-DAG: csel w[[SELECT_WREG_1:[0-9]+]], wzr, [[REG1]], ne ; CHECK-DAG: csel [[SELECT_XREG_2:x[0-9]+]], [[REG2]], [[REG3]], ne ; CHECK: ubfx [[SELECT_XREG_1:x[0-9]+]], x[[SELECT_WREG_1]], #0, #32 ; CHECK-NEXT: add x0, [[SELECT_XREG_2]], [[SELECT_XREG_1]] Index: llvm/test/CodeGen/AArch64/sat-add.ll =================================================================== --- llvm/test/CodeGen/AArch64/sat-add.ll +++ llvm/test/CodeGen/AArch64/sat-add.ll @@ -490,10 +490,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-43 ; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: mov w9, #42 ; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d +; CHECK-NEXT: mov w8, #42 ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b -; CHECK-NEXT: dup v0.2d, x9 +; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: add v0.2d, v2.2d, v0.2d ; CHECK-NEXT: ret %c = icmp ult <2 x i64> %x, @@ -523,9 +523,9 @@ ; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: mov x9, #-43 ; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: dup v2.2d, x9 +; CHECK-NEXT: mov x8, #-43 +; CHECK-NEXT: dup v2.2d, x8 ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d ; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b Index: llvm/test/CodeGen/AArch64/tail-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/tail-call.ll +++ llvm/test/CodeGen/AArch64/tail-call.ll @@ -59,7 +59,10 @@ ; callee will not deallocate the space, even in fastcc. tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2) -; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]! +; FIXME: We should notice that these stores can be paired (stp). + +; CHECK: str {{x[0-9]+}}, [sp, #24] +; CHECK: str {{x[0-9]+}}, [sp, #16]! ; CHECK-NEXT: b callee_stack16 ret void } Index: llvm/test/CodeGen/AArch64/urem-seteq-optsize.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-optsize.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-optsize.ll @@ -10,10 +10,10 @@ ; CHECK-NEXT: mov w8, #5 ; CHECK-NEXT: udiv w8, w0, w8 ; CHECK-NEXT: add w8, w8, w8, lsl #2 -; CHECK-NEXT: mov w9, #-10 ; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: csel w0, w8, w9, eq +; CHECK-NEXT: mov w8, #-10 +; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret %rem = urem i32 %X, 5 %cmp = icmp eq i32 %rem, 0 @@ -29,10 +29,10 @@ ; CHECK-NEXT: umull x8, w0, w8 ; CHECK-NEXT: lsr x8, x8, #34 ; CHECK-NEXT: add w8, w8, w8, lsl #2 -; CHECK-NEXT: mov w9, #-10 ; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: csel w0, w8, w9, eq +; CHECK-NEXT: mov w8, #-10 +; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret %rem = urem i32 %X, 5 %cmp = icmp eq i32 %rem, 0 Index: llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -124,11 +124,11 @@ define <4 x i32> @test_urem_comp(<4 x i32> %X) nounwind readnone { ; CHECK-LABEL: test_urem_comp: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: adrp x9, .LCPI4_0 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI4_0] +; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: movk w9, #52428, lsl #16 +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: umull2 v4.2d, v0.4s, v2.4s ; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s ; CHECK-NEXT: uzp2 v2.4s, v2.4s, v4.4s Index: llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll @@ -29,33 +29,33 @@ define <4 x i16> @test_urem_odd_vec_i16(<4 x i16> %X) nounwind readnone { ; CHECK-LABEL: test_urem_odd_vec_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: mov w11, #52429 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: movk w9, #52428, lsl #16 -; CHECK-NEXT: umull x12, w8, w9 +; CHECK-NEXT: movk w11, #52428, lsl #16 +; CHECK-NEXT: umull x12, w8, w11 ; CHECK-NEXT: lsr x12, x12, #34 -; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: add w12, w12, w12, lsl #2 ; CHECK-NEXT: sub w8, w8, w12 -; CHECK-NEXT: umull x12, w10, w9 +; CHECK-NEXT: umull x12, w9, w11 ; CHECK-NEXT: lsr x12, x12, #34 -; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: add w12, w12, w12, lsl #2 -; CHECK-NEXT: sub w10, w10, w12 -; CHECK-NEXT: umull x12, w11, w9 +; CHECK-NEXT: sub w9, w9, w12 +; CHECK-NEXT: umull x12, w10, w11 ; CHECK-NEXT: lsr x12, x12, #34 ; CHECK-NEXT: add w12, w12, w12, lsl #2 -; CHECK-NEXT: sub w11, w11, w12 +; CHECK-NEXT: sub w10, w10, w12 ; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: umull x9, w12, w9 -; CHECK-NEXT: lsr x9, x9, #34 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: add w9, w9, w9, lsl #2 +; CHECK-NEXT: umull x11, w12, w11 +; CHECK-NEXT: lsr x11, x11, #34 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: add w11, w11, w11, lsl #2 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: sub w9, w12, w9 -; CHECK-NEXT: mov v0.h[2], w11 -; CHECK-NEXT: mov v0.h[3], w9 +; CHECK-NEXT: sub w11, w12, w11 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: cmeq v0.4h, v0.4h, #0 ; CHECK-NEXT: movi v1.4h, #1 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b @@ -100,32 +100,32 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov w9, #9363 -; CHECK-NEXT: movk w9, #37449, lsl #16 -; CHECK-NEXT: umov w10, v0.h[0] -; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: ubfx w13, w8, #1, #15 +; CHECK-NEXT: mov w16, #9363 +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: umov w10, v0.h[2] +; CHECK-NEXT: umov w11, v0.h[3] +; CHECK-NEXT: ubfx w12, w8, #1, #15 +; CHECK-NEXT: movk w16, #37449, lsl #16 +; CHECK-NEXT: ubfx w13, w9, #1, #15 ; CHECK-NEXT: ubfx w14, w10, #1, #15 ; CHECK-NEXT: ubfx w15, w11, #1, #15 -; CHECK-NEXT: ubfx w16, w12, #1, #15 -; CHECK-NEXT: umull x13, w13, w9 -; CHECK-NEXT: umull x14, w14, w9 -; CHECK-NEXT: umull x15, w15, w9 -; CHECK-NEXT: umull x9, w16, w9 +; CHECK-NEXT: umull x12, w12, w16 +; CHECK-NEXT: umull x13, w13, w16 +; CHECK-NEXT: umull x14, w14, w16 +; CHECK-NEXT: umull x15, w15, w16 +; CHECK-NEXT: lsr x12, x12, #34 ; CHECK-NEXT: orr w16, wzr, #0xe -; CHECK-NEXT: lsr x13, x13, #34 -; CHECK-NEXT: msub w8, w13, w16, w8 -; CHECK-NEXT: lsr x13, x14, #34 -; CHECK-NEXT: msub w10, w13, w16, w10 -; CHECK-NEXT: lsr x13, x15, #34 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: msub w11, w13, w16, w11 -; CHECK-NEXT: lsr x9, x9, #34 +; CHECK-NEXT: msub w8, w12, w16, w8 +; CHECK-NEXT: lsr x12, x13, #34 +; CHECK-NEXT: msub w9, w12, w16, w9 +; CHECK-NEXT: lsr x12, x14, #34 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: msub w10, w12, w16, w10 +; CHECK-NEXT: lsr x12, x15, #34 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w9, w9, w16, w12 -; CHECK-NEXT: mov v0.h[2], w11 -; CHECK-NEXT: mov v0.h[3], w9 +; CHECK-NEXT: msub w11, w12, w16, w11 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: cmeq v0.4h, v0.4h, #0 ; CHECK-NEXT: movi v1.4h, #1 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b Index: llvm/test/CodeGen/AMDGPU/add.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/add.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/add.v2i16.ll @@ -203,8 +203,8 @@ ; GFX9: buffer_store_dwordx4 ; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; VI: flat_load_dword v[[A:[0-9]+]] -; VI: flat_load_dword v[[B:[0-9]+]] +; VI-DAG: flat_load_dword v[[A:[0-9]+]] +; VI-DAG: flat_load_dword v[[B:[0-9]+]] ; VI-DAG: v_add_u16_e32 ; VI: v_add_u16_sdwa v[[ADD_HI:[0-9]+]], v[[A]], v[[B]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 Index: llvm/test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -194,9 +194,9 @@ } ; HSA-LABEL: {{^}}cast_neg1_group_to_flat_addrspacecast: -; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} -; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} -; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} +; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32* @@ -221,7 +221,7 @@ ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} -; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(5)* null to i32* Index: llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -232,9 +232,9 @@ ; SI-ALLOCA: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; SI-PROMOTE-VECT: s_load_dword [[IDX:s[0-9]+]] -; SI-PROMOTE-VECT: s_mov_b32 [[SREG:s[0-9]+]], 0x10000 ; SI-PROMOTE-VECT: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 4 ; SI-PROMOTE-VECT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SCALED_IDX]] +; SI-PROMOTE-VECT: s_mov_b32 [[SREG:s[0-9]+]], 0x10000 ; SI-PROMOTE-VECT: v_bfe_u32 v{{[0-9]+}}, [[SREG]], [[VREG]], 16 define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 { entry: Index: llvm/test/CodeGen/AMDGPU/bswap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/bswap.ll +++ llvm/test/CodeGen/AMDGPU/bswap.ll @@ -11,7 +11,7 @@ declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone ; FUNC-LABEL: @test_bswap_i32 -; GCN: s_load_dword [[VAL:s[0-9]+]] +; GCN-DAG: s_load_dword [[VAL:s[0-9]+]] ; GCN-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8 ; GCN-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24 ; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff Index: llvm/test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -126,7 +126,7 @@ ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+4 -; GCN-NEXT: v_mov_b32_e32 v0, 0x7b +; GCN-DAG: v_mov_b32_e32 v0, 0x7b ; HSA-DAG: s_mov_b32 s4, s33{{$}} ; GCN-DAG: s_mov_b32 s32, s33{{$}} @@ -144,9 +144,9 @@ ; MESA-DAG: s_mov_b32 s33, s3{{$}} ; GCN-DAG: buffer_load_sbyte v0 -; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+4 +; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4 +; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+4 ; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s32, s3 @@ -165,9 +165,9 @@ ; HSA-DAG: s_mov_b32 s33, s9{{$}} ; GCN-DAG: buffer_load_ubyte v0 -; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+4 +; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4 +; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+4 ; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s32, s33 @@ -197,9 +197,9 @@ ; MESA-DAG: s_mov_b32 s33, s3{{$}} ; GCN-DAG: buffer_load_sshort v0 -; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+4 +; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4 +; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+4 ; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s32, s33 @@ -218,9 +218,9 @@ ; GCN-DAG: buffer_load_ushort v0 -; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+4 +; GCN-DAG: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4 +; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+4 ; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s32, s33 @@ -240,7 +240,7 @@ ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+4 -; GCN: v_mov_b32_e32 v0, 42 +; GCN-DAG: v_mov_b32_e32 v0, 42 ; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s32, s33 @@ -688,7 +688,7 @@ ; GCN-NOT: s_add_u32 [[SP]] ; GCN-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 ; GCN-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 -; GCN-NEXT: s_swappc_b64 +; GCN: s_swappc_b64 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20 ; GCN-NOT: s_sub_u32 [[SP]] Index: llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -383,13 +383,12 @@ ; GCN: enable_sgpr_workgroup_id_y = 0 ; GCN: enable_sgpr_workgroup_id_z = 0 +; GCN-NOT: s6 ; GCN-DAG: s_mov_b32 s33, s7 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b - -; GCN-NOT: s6 -; GCN: s_mov_b32 s4, s33 -; GCN-NOT: s6 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s32, s33 +; GCN-NOT: s6 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { call void @other_arg_use_workgroup_id_x(i32 555) @@ -577,16 +576,16 @@ ; GCN: s_swappc_b64 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 -; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]] -; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]] -; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} -; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]] -; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]] -; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} -; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]] -; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]] -; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 +; GCN-DAG: v_mov_b32_e32 v[[LO1:[0-9]+]], s[[LO_X]] +; GCN-DAG: v_mov_b32_e32 v[[HI1:[0-9]+]], s[[HI_X]] +; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO1]]:[[HI1]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[LO2:[0-9]+]], s[[LO_Y]] +; GCN-DAG: v_mov_b32_e32 v[[HI2:[0-9]+]], s[[HI_Y]] +; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO2]]:[[HI2]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[LO3:[0-9]+]], s[[LO_Z]] +; GCN-DAG: v_mov_b32_e32 v[[HI3:[0-9]+]], s[[HI_Z]] +; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO3]]:[[HI3]]{{\]}} ; GCN: ; use ; GCN: ; use [[SAVE_X]] ; GCN: ; use [[SAVE_Y]] Index: llvm/test/CodeGen/AMDGPU/calling-conventions.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/calling-conventions.ll +++ llvm/test/CodeGen/AMDGPU/calling-conventions.ll @@ -123,11 +123,11 @@ ; FIXME: Inconsistent ABI between targets ; GCN-LABEL: {{^}}ps_mesa_v2f16: -; VI: v_mov_b32_e32 v1, 0x3c00 -; VI-NEXT: v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_add_f16_e32 v0, 1.0, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: ; return +; VI: v_mov_b32_e32 v2, 0x3c00 +; VI-NEXT: v_add_f16_e32 v1, 1.0, v0 +; VI-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v1, v0 +; VI: ; return ; SI-DAG: v_cvt_f16_f32_e32 [[CVT_ELT0:v[0-9]+]], v0 ; SI-DAG: v_cvt_f16_f32_e32 [[CVT_ELT1:v[0-9]+]], v1 Index: llvm/test/CodeGen/AMDGPU/captured-frame-index.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/captured-frame-index.ll +++ llvm/test/CodeGen/AMDGPU/captured-frame-index.ll @@ -14,9 +14,9 @@ ; GCN-LABEL: {{^}}stored_fi_to_lds: ; GCN: s_load_dword [[LDSPTR:s[0-9]+]] -; GCN: buffer_store_dword v{{[0-9]+}}, off, -; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}} -; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, +; GCN-DAG: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 4{{$}} +; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] ; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]] define amdgpu_kernel void @stored_fi_to_lds(float addrspace(5)* addrspace(3)* %ptr) #0 { %tmp = alloca float, addrspace(5) Index: llvm/test/CodeGen/AMDGPU/ctpop64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ctpop64.ll +++ llvm/test/CodeGen/AMDGPU/ctpop64.ll @@ -45,11 +45,11 @@ ; FUNC-LABEL: {{^}}v_ctpop_i64_user: ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 -; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]] +; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; SI-DAG: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; VI-DAG: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}} +; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]] ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind { Index: llvm/test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -99,8 +99,8 @@ ; GCN-LABEL: {{^}}simple_write2_two_val_subreg2_mixed_f32: ; GFX9-NOT: m0 -; CI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} -; CI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} +; CI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}} +; CI-DAG: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}} ; CI-DAG: s_mov_b32 m0 ; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} @@ -338,10 +338,15 @@ ; CI-DAG: s_mov_b32 m0 ; GFX9-NOT: m0 -; GCN-DAG: {{buffer|global}}_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} -; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} -; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1 -; GCN: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 + +; GFX9-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} +; GFX9-DAG: {{buffer|global}}_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}} +; CI-DAG: v_lshlrev_b32_e32 [[VPTR0:v[0-9]+]], 3, v{{[0-9]+}} +; CI-DAG: v_add_{{.*}}32_e32 [[VPTR:v[0-9]+]], vcc, s{{[0-9]+}}, [[VPTR0]] +; CI-DAG: {{buffer|global}}_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 + +; GCN-DAG: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1 +; GCN-DAG: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 ; GCN: s_endpgm define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 Index: llvm/test/CodeGen/AMDGPU/ds_write2st64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ds_write2st64.ll +++ llvm/test/CodeGen/AMDGPU/ds_write2st64.ll @@ -63,7 +63,7 @@ ; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4 ; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v{{[0-9]+}} -; GCN: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]] +; GCN-DAG: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]] ; GCN: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 ; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 { @@ -91,7 +91,7 @@ ; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off offset:8 ; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 3, v{{[0-9]+}} -; GCN: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]] +; GCN-DAG: v_add_{{i|u}}32_e32 [[VPTR:v[0-9]+]], {{(vcc, )?}}s{{[0-9]+}}, [[SHL]] ; GCN: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 ; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { Index: llvm/test/CodeGen/AMDGPU/fabs.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fabs.f16.ll +++ llvm/test/CodeGen/AMDGPU/fabs.f16.ll @@ -137,8 +137,8 @@ ; CI: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CI: v_cvt_f16_f32 -; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, s{{[0-9]+}} +; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, s{{[0-9]+}} ; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]] ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[FABS]], s{{[0-9]+$}} Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -633,7 +633,7 @@ ; GFX9: s_waitcnt ; GFX9-DAG: v_max_f16_e32 v0, v0, v0 ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4000 -; GFX9: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-DAG: v_and_b32_e32 v0, 0xffff, v0 ; GFX9: v_lshl_or_b32 v0, [[K]], 16, v0 ; GFX9: s_setpc_b64 @@ -666,8 +666,8 @@ } ; GCN-LABEL: {{^}}s_test_canonicalize_undef_v4f16: -; GCN: v_mov_b32_e32 v0, 0x7e007e00 -; GCN: v_mov_b32_e32 v1, v0 +; GCN-DAG: v_mov_b32_e32 v[[R:[0-9]+]], 0x7e007e00 +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, v[[R]] define amdgpu_kernel void @s_test_canonicalize_undef_v4f16(<4 x half> addrspace(1)* %out) #1 { %canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef) store <4 x half> %canonicalized, <4 x half> addrspace(1)* %out Index: llvm/test/CodeGen/AMDGPU/fexp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fexp.ll +++ llvm/test/CodeGen/AMDGPU/fexp.ll @@ -4,26 +4,12 @@ ;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s define float @v_exp_f32(float %arg0) { -; SI-LABEL: v_exp_f32: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_exp_f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; VI-NEXT: v_exp_f32_e32 v0, v0 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_exp_f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 -; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_exp_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.exp.f32(float %arg0) ret float %result } @@ -32,9 +18,9 @@ ; GCN-LABEL: v_exp_v2f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b -; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} -; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} +; GCN-NEXT: s_mov_b32 s6, 0x3fb8aa3b +; GCN-NEXT: v_mul_f32_e32 v0, s6, v0 +; GCN-NEXT: v_mul_f32_e32 v1, s6, v1 ; GCN-NEXT: v_exp_f32_e32 v0, v0 ; GCN-NEXT: v_exp_f32_e32 v1, v1 ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -46,33 +32,32 @@ ; GCN-LABEL: v_exp_v3f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b -; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} -; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} -; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} +; GCN-NEXT: s_mov_b32 s6, 0x3fb8aa3b +; GCN-NEXT: v_mul_f32_e32 v0, s6, v0 +; GCN-NEXT: v_mul_f32_e32 v1, s6, v1 +; GCN-NEXT: v_mul_f32_e32 v2, s6, v2 ; GCN-NEXT: v_exp_f32_e32 v0, v0 ; GCN-NEXT: v_exp_f32_e32 v1, v1 ; GCN-NEXT: v_exp_f32_e32 v2, v2 ; GCN-NEXT: s_setpc_b64 s[30:31] -; %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0) ret <3 x float> %result } define <4 x float> @v_exp_v4f32(<4 x float> %arg0) { -; SI-LABEL: v_exp_v4f32: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b -; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0 -; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1 -; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2 -; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3 -; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: v_exp_f32_e32 v1, v1 -; SI-NEXT: v_exp_f32_e32 v2, v2 -; SI-NEXT: v_exp_f32_e32 v3, v3 -; SI-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_exp_v4f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s6, 0x3fb8aa3b +; GCN-NEXT: v_mul_f32_e32 v0, s6, v0 +; GCN-NEXT: v_mul_f32_e32 v1, s6, v1 +; GCN-NEXT: v_mul_f32_e32 v2, s6, v2 +; GCN-NEXT: v_mul_f32_e32 v3, s6, v3 +; GCN-NEXT: v_exp_f32_e32 v0, v0 +; GCN-NEXT: v_exp_f32_e32 v1, v1 +; GCN-NEXT: v_exp_f32_e32 v2, v2 +; GCN-NEXT: v_exp_f32_e32 v3, v3 +; GCN-NEXT: s_setpc_b64 s[30:31] %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0) ret <4 x float> %result } @@ -110,11 +95,11 @@ ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} -; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} +; SI-NEXT: s_mov_b32 s6, 0x3fb8aa3b +; SI-NEXT: v_mul_f32_e32 v0, s6, v0 +; SI-NEXT: v_mul_f32_e32 v1, s6, v1 ; SI-NEXT: v_exp_f32_e32 v0, v0 ; SI-NEXT: v_exp_f32_e32 v1, v1 ; SI-NEXT: s_setpc_b64 s[30:31] @@ -122,20 +107,20 @@ ; VI-LABEL: v_exp_v2f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 -; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]] -; VI-NEXT: v_mul_f16_sdwa [[MUL1:v[0-9]+]], v{{[0-9]+}}, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v{{[0-9]+}} -; VI-NEXT: v_exp_f16_sdwa [[MUL1]], [[MUL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_exp_f16_e32 [[MUL2]], [[MUL2]] -; VI-NEXT: v_or_b32_e32 v{{[0-9]+}}, [[MUL2]], [[MUL1]] +; VI-NEXT: s_movk_i32 s6, 0x3dc5 +; VI-NEXT: v_mov_b32_e32 v2, s6 +; VI-NEXT: v_mul_f16_e32 v1, s6, v0 +; VI-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_exp_f16_e32 v1, v1 +; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v1, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_exp_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 -; GFX9-NEXT: v_pk_mul_f16 v0, v0, [[SREG]] op_sel_hi:[1,0] +; GFX9-NEXT: s_movk_i32 s6, 0x3dc5 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, s6 op_sel_hi:[1,0] ; GFX9-NEXT: v_exp_f16_e32 v1, v0 ; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 @@ -158,15 +143,15 @@ ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b ; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 ; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0 -; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1 -; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2 -; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3 +; SI-NEXT: s_mov_b32 s6, 0x3fb8aa3b +; SI-NEXT: v_mul_f32_e32 v0, s6, v0 +; SI-NEXT: v_mul_f32_e32 v1, s6, v1 +; SI-NEXT: v_mul_f32_e32 v2, s6, v2 +; SI-NEXT: v_mul_f32_e32 v3, s6, v3 ; SI-NEXT: v_exp_f32_e32 v0, v0 ; SI-NEXT: v_exp_f32_e32 v1, v1 ; SI-NEXT: v_exp_f32_e32 v2, v2 @@ -176,37 +161,37 @@ ; VI-LABEL: v_exp_v4f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 -; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]] -; VI-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1 -; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0 -; VI-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]] -; VI-NEXT: v_exp_f16_sdwa [[EXP2:v[0-9]+]], v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL2]] -; VI-NEXT: v_exp_f16_sdwa [[EXP4:v[0-9]+]], v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_or_b32_e32 v1, [[EXP1]], [[EXP2]] -; VI-NEXT: v_or_b32_e32 v0, [[EXP3]], [[EXP4]] +; VI-NEXT: s_movk_i32 s6, 0x3dc5 +; VI-NEXT: v_mov_b32_e32 v3, s6 +; VI-NEXT: v_mul_f16_e32 v2, s6, v1 +; VI-NEXT: v_mul_f16_e32 v4, s6, v0 +; VI-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_exp_f16_e32 v2, v2 +; VI-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_exp_f16_e32 v4, v4 +; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_or_b32_e32 v0, v4, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_exp_v4f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 -; GFX9-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1 -; GFX9-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0 -; GFX9-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]] -; GFX9-NEXT: v_exp_f16_e32 [[EXP2:v[0-9]+]], [[MUL2]] -; GFX9-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL4]] -; GFX9-NEXT: v_exp_f16_e32 [[EXP4:v[0-9]+]], [[MUL3]] -; GFX9-NEXT: v_mov_b32_e32 [[VCONST:v[0-9]+]], 0xffff -; GFX9-NEXT: v_and_b32_e32 [[AND1:v[0-9]+]], [[VCONST]], [[EXP2]] -; GFX9-NEXT: v_and_b32_e32 [[AND2:v[0-9]+]], [[VCONST]], [[EXP1]] -; GFX9-NEXT: v_lshl_or_b32 v0, [[EXP3]], 16, [[AND1]] -; GFX9-NEXT: v_lshl_or_b32 v1, [[EXP4]], 16, [[AND2]] +; GFX9-NEXT: s_movk_i32 s6, 0x3dc5 +; GFX9-NEXT: v_mul_f16_e32 v2, s6, v1 +; GFX9-NEXT: v_mul_f16_e32 v3, s6, v0 +; GFX9-NEXT: v_mul_f16_sdwa v1, v1, s6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_mul_f16_sdwa v0, v0, s6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_exp_f16_e32 v2, v2 +; GFX9-NEXT: v_exp_f16_e32 v3, v3 +; GFX9-NEXT: v_exp_f16_e32 v0, v0 +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NEXT: v_and_b32_e32 v3, v4, v3 +; GFX9-NEXT: v_and_b32_e32 v2, v4, v2 +; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v3 +; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0) ret <4 x half> %result Index: llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll +++ llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll @@ -8,22 +8,20 @@ ; SI-LABEL: test_fmax_legacy_uge_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmax_legacy_uge_f64: @@ -60,22 +58,20 @@ ; SI-LABEL: test_fmax_legacy_oge_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmax_legacy_oge_f64: @@ -112,22 +108,20 @@ ; SI-LABEL: test_fmax_legacy_ugt_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmax_legacy_ugt_f64: @@ -164,22 +158,20 @@ ; SI-LABEL: test_fmax_legacy_ogt_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmax_legacy_ogt_f64: Index: llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll +++ llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll @@ -6,22 +6,20 @@ ; SI-LABEL: test_fmin_legacy_uge_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_nlt_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_uge_f64: @@ -58,22 +56,20 @@ ; SI-LABEL: test_fmin_legacy_ugt_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_nle_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_ugt_f64: @@ -110,22 +106,20 @@ ; SI-LABEL: test_fmin_legacy_ule_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ngt_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_ule_f64: @@ -162,22 +156,20 @@ ; SI-LABEL: test_fmin_legacy_ult_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_nge_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_ult_f64: @@ -214,22 +206,20 @@ ; SI-LABEL: test_fmin_legacy_oge_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_ge_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_oge_f64: @@ -266,22 +256,20 @@ ; SI-LABEL: test_fmin_legacy_ogt_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_gt_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_ogt_f64: @@ -318,22 +306,20 @@ ; SI-LABEL: test_fmin_legacy_ole_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_le_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_ole_f64: @@ -370,22 +356,20 @@ ; SI-LABEL: test_fmin_legacy_olt_f64: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s10, 0 -; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b64 s[8:9], s[2:3] +; SI-NEXT: s_mov_b32 s6, 0 ; SI-NEXT: v_mov_b32_e32 v1, 0 -; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b64 s[4:5], s[2:3] +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s7, s3 +; SI-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cmp_lt_f64_e32 vcc, v[0:1], v[2:3] ; SI-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc ; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: test_fmin_legacy_olt_f64: Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -157,13 +157,12 @@ } ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32: -; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}} -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] - -; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]] -; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] -; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]] +; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN-DAG: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN-SAFE-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}} +; GCN-SAFE-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]] +; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] +; GCN-SAFE-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]] ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] Index: llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -117,8 +117,8 @@ ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} -; VI: v_mul_f16_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|, 4.0 -; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_mul_f16_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|, 4.0 +; VI-DAG: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] Index: llvm/test/CodeGen/AMDGPU/fp-classify.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fp-classify.ll +++ llvm/test/CodeGen/AMDGPU/fp-classify.ll @@ -165,11 +165,11 @@ ; SI-DAG: v_cmp_class_f32_e64 [[CLASS:s\[[0-9]+:[0-9]+\]]], [[X]], [[K]] ; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CLASS]] -; VI-DAG: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[X]], [[VY]] -; VI-DAG: v_cmp_class_f32_e32 vcc, [[X]], [[K]] -; VI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP]], vcc +; VI-DAG: v_cmp_o_f32_e32 vcc, [[X]], [[VY]] +; VI-DAG: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[X]], [[K]] +; VI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP]] -; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[AND]] +; GCN-DAG: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[AND]] define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(i32 addrspace(1)* nocapture %out, float %x, [8 x i32], float %y) #0 { %ord = fcmp ord float %x, %y %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 Index: llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -131,19 +131,19 @@ ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: ; GCN: s_sub_u32 [[SUB_OFFSET:s[0-9]+]], s5, s4 -; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6 -; CI: v_add_i32_e64 [[ADD:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]] +; CI-DAG: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], [[SUB_OFFSET]], 6 +; CI-DAG: v_add_i32_e64 [[ADD:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]] ; GFX9: v_lshrrev_b32_e64 [[SHIFT:v[0-9]+]], 6, [[SUB_OFFSET]] ; GFX9: v_add_u32_e32 [[ADD:v[0-9]+]], 4, [[SHIFT]] -; GCN: s_and_saveexec_b64 +; GCN-DAG: s_and_saveexec_b64 -; CI: v_add_i32_e32 v0, vcc, 4, [[ADD]] -; CI: buffer_load_dword v1, v1, s[0:3], s4 offen offset:4{{$}} +; CI-DAG: v_add_i32_e32 v{{[0-9]+}}, vcc, 4, [[ADD]] +; CI-DAG: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}} -; GFX9: v_add_u32_e32 v0, 4, [[ADD]] -; GFX9: buffer_load_dword v1, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}} +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 4, [[ADD]] +; GFX9: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen offset:4{{$}} ; GCN: ds_write_b32 define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 } addrspace(5)* byval %arg0, i32 %arg2) #0 { @@ -163,17 +163,17 @@ ; Added offset can't be used with VOP3 add ; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32: -; GCN: s_sub_u32 s6, s5, s4 +; GCN-DAG: s_sub_u32 s6, s5, s4 ; GCN-DAG: s_movk_i32 s6, 0x204 ; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], s6, 6 -; CI: v_add_i32_e64 v0, s[6:7], s6, [[SCALED]] +; CI-DAG: v_add_i32_e64 v{{[0-9]+}}, s[6:7], s6, [[SCALED]] ; GFX9-DAG: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s6 ; GFX9: v_add_u32_e32 v0, s6, [[SCALED]] -; GCN: v_mul_lo_i32 v0, v0, 9 -; GCN: ds_write_b32 v0, v0 +; GCN-DAG: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, 9 +; GCN-DAG: ds_write_b32 v0, v{{[0-9]+}} define void @func_other_fi_user_non_inline_imm_offset_i32() #0 { %alloca0 = alloca [128 x i32], align 4, addrspace(5) %alloca1 = alloca [8 x i32], align 4, addrspace(5) Index: llvm/test/CodeGen/AMDGPU/function-returns.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/function-returns.ll +++ llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -320,8 +320,8 @@ ; FIXME: Mixing buffer and global ; FIXME: Should not scalarize ; GCN-LABEL: {{^}}v5i16_func_void: -; GFX9: buffer_load_dwordx2 v[0:1] -; GFX9-NEXT: global_load_short_d16 v2 +; GFX9-DAG: buffer_load_dwordx2 v[0:1] +; GFX9-DAG: global_load_short_d16 v2 ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: s_setpc_b64 define <5 x i16> @v5i16_func_void() #0 { Index: llvm/test/CodeGen/AMDGPU/global_smrd.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/global_smrd.ll +++ llvm/test/CodeGen/AMDGPU/global_smrd.ll @@ -100,9 +100,11 @@ ; uniform load from global array dominated by alias store ; CHECK-LABEL: @global_array_alias_store -; CHECK: flat_store_dword -; CHECK: v_mov_b32_e32 v[[ADDR_LO:[0-9]+]], s{{[0-9]+}} -; CHECK: v_mov_b32_e32 v[[ADDR_HI:[0-9]+]], s{{[0-9]+}} +; CHECK: s_add_u32 s[[AL:[0-9]+]], s{{[0-9]+}}, A@gotpcrel32@lo+4 +; CHECK: s_addc_u32 s[[AH:[0-9]+]], s{{[0-9]+}}, A@gotpcrel32@hi+4 +; CHECK: s_load_dwordx2 s{{\[}}[[S_ADDRL:[0-9]+]]:[[S_ADDRH:[0-9]+]]{{\]}}, s{{\[}}[[AL]]:[[AH]]{{\]}}, 0 +; CHECK-DAG: v_mov_b32_e32 v[[ADDR_LO:[0-9]+]], s[[S_ADDRL]] +; CHECK-DAG: v_mov_b32_e32 v[[ADDR_HI:[0-9]+]], s[[S_ADDRH]] ; CHECK: flat_load_dwordx2 [[A_ADDR:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} ; CHECK: flat_load_dword [[VVAL:v[0-9]+]], [[A_ADDR]] ; CHECK: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[VVAL]] Index: llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll +++ llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll @@ -14,7 +14,7 @@ } ; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_huge: -; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[FI:v[0-9]+]], 4 ; GCN-DAG: buffer_store_dword ; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] Index: llvm/test/CodeGen/AMDGPU/idot2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/idot2.ll +++ llvm/test/CodeGen/AMDGPU/idot2.ll @@ -12,23 +12,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_lshr_b32 s9, s5, 16 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: v_mov_b32_e32 v0, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v0, v1 -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -36,23 +36,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -60,23 +60,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -129,23 +129,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: s_lshr_b32 s9, s5, 16 -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mul_u32_u24_e32 v0, s5, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, s6, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: s_mov_b32 s5, 0xffff +; GFX7-NEXT: s_and_b32 s2, s2, s5 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s5 +; GFX7-NEXT: v_mul_u32_u24_e32 v1, s3, v1 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -153,23 +153,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mul_u32_u24_e32 v0, s2, v0 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, s5, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: s_and_b32 s2, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mul_u32_u24_e32 v3, s0, v3 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, s4, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -177,23 +177,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mul_u32_u24_e32 v0, s2, v0 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_add_u32_e32 v2, s5, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mul_u32_u24_e32 v3, s0, v3 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-NODL-NEXT: v_add_u32_e32 v2, s4, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -201,23 +201,23 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-DL-NEXT: v_mul_u32_u24_e32 v0, s2, v0 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-DL-NEXT: v_add_u32_e32 v2, s5, v0 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-DL-NEXT: s_and_b32 s2, s2, s0 +; GFX9-DL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-DL-NEXT: v_mul_u32_u24_e32 v3, s0, v3 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-DL-NEXT: v_add_u32_e32 v2, s4, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, @@ -249,22 +249,22 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i16 s7, s4 -; GFX7-NEXT: s_ashr_i32 s4, s4, 16 -; GFX7-NEXT: s_sext_i32_i16 s8, s5 -; GFX7-NEXT: s_ashr_i32 s5, s5, 16 +; GFX7-NEXT: s_sext_i32_i16 s5, s2 +; GFX7-NEXT: s_ashr_i32 s2, s2, 16 +; GFX7-NEXT: s_sext_i32_i16 s6, s3 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_ashr_i32 s3, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_i32_i24 v0, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s3, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v2, s5 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v2, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -360,22 +360,22 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_lshr_b32 s8, s5, 16 -; GFX7-NEXT: s_sext_i32_i16 s4, s4 -; GFX7-NEXT: v_mov_b32_e32 v0, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v0, v1 -; GFX7-NEXT: s_sext_i32_i16 s5, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_i32_i24 v0, s5, v1, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: s_sext_i32_i16 s2, s2 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_sext_i32_i16 s3, s3 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s3, v2, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -477,23 +477,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_lshr_b32 s9, s5, 16 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: v_mov_b32_e32 v0, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v0, v1 -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -501,23 +501,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -525,23 +525,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -591,22 +591,22 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i16 s7, s4 -; GFX7-NEXT: s_ashr_i32 s4, s4, 16 -; GFX7-NEXT: s_and_b32 s8, s5, 0xffff -; GFX7-NEXT: s_ashr_i32 s5, s5, 16 +; GFX7-NEXT: s_sext_i32_i16 s5, s2 +; GFX7-NEXT: s_ashr_i32 s2, s2, 16 +; GFX7-NEXT: s_and_b32 s6, s3, 0xffff +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_ashr_i32 s3, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_i32_i24 v0, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s3, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v2, s5 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v2, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -708,18 +708,18 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s4, s4, 0xffff -; GFX7-NEXT: s_lshr_b32 s5, s5, 16 -; GFX7-NEXT: v_mov_b32_e32 v0, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, s5, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, s4, v0 +; GFX7-NEXT: s_and_b32 s2, s2, 0xffff +; GFX7-NEXT: s_lshr_b32 s3, s3, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, s3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, s2, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -809,23 +809,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_lshr_b32 s4, s4, 16 -; GFX7-NEXT: s_and_b32 s8, s5, s8 -; GFX7-NEXT: s_lshr_b32 s5, s5, 16 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v1, v0 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -833,23 +833,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -857,23 +857,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -923,23 +923,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x1 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x1 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x1 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x1 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_lshr_b32 s4, s4, 16 -; GFX7-NEXT: s_and_b32 s8, s5, s8 -; GFX7-NEXT: s_lshr_b32 s5, s5, 16 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v1, v0 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -947,23 +947,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x4 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x4 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x4 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x4 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -971,23 +971,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x4 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x4 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x4 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x4 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1037,23 +1037,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX7-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s9, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: s_and_b32 s7, s7, s8 -; GFX7-NEXT: v_mov_b32_e32 v0, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v0, v1 -; GFX7-NEXT: s_and_b32 s6, s6, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v0, s6 +; GFX7-NEXT: s_mov_b32 s6, 0xffff +; GFX7-NEXT: s_and_b32 s3, s3, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_and_b32 s5, s5, s6 +; GFX7-NEXT: s_and_b32 s2, s2, s6 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s4, s4, s6 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1061,23 +1061,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s8, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX8-NEXT: s_load_dword s6, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s3, s3, s8 -; GFX8-NEXT: s_and_b32 s2, s2, s8 -; GFX8-NEXT: s_and_b32 s5, s5, s8 -; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s5, v1, v0 -; GFX8-NEXT: s_and_b32 s4, s4, s8 -; GFX8-NEXT: v_mov_b32_e32 v1, s2 -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s3, s3, s0 +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s2, s4, s0 +; GFX8-NEXT: v_mov_b32_e32 v2, s6 +; GFX8-NEXT: s_and_b32 s0, s5, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1085,23 +1085,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s8, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NODL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; GFX9-NODL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX9-NODL-NEXT: s_load_dword s6, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s3, s3, s8 -; GFX9-NODL-NEXT: s_and_b32 s2, s2, s8 -; GFX9-NODL-NEXT: s_and_b32 s5, s5, s8 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s5, v1, v0 -; GFX9-NODL-NEXT: s_and_b32 s4, s4, s8 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v1, v0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s0 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s2, s4, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NODL-NEXT: s_and_b32 s0, s5, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1109,23 +1109,23 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s8, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; GFX9-DL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX9-DL-NEXT: s_load_dword s6, s[0:1], 0x0 -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s3, s3, s8 -; GFX9-DL-NEXT: s_and_b32 s2, s2, s8 -; GFX9-DL-NEXT: s_and_b32 s5, s5, s8 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s5, v1, v0 -; GFX9-DL-NEXT: s_and_b32 s4, s4, s8 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v1, v0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_and_b32 s3, s3, s0 +; GFX9-DL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-DL-NEXT: s_and_b32 s2, s4, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-DL-NEXT: s_and_b32 s0, s5, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i16> addrspace(1)* %src2, @@ -1158,23 +1158,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX7-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s9, s[0:1], 0x0 +; GFX7-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s5, s5, s8 +; GFX7-NEXT: s_lshr_b32 s2, s2, 16 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_mov_b32 s2, 0xffff +; GFX7-NEXT: s_and_b32 s3, s3, s2 +; GFX7-NEXT: v_mov_b32_e32 v2, s3 +; GFX7-NEXT: v_mov_b32_e32 v0, s6 +; GFX7-NEXT: s_and_b32 s2, s5, s2 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v2, v0 ; GFX7-NEXT: s_lshr_b32 s4, s4, 16 -; GFX7-NEXT: s_and_b32 s7, s7, s8 -; GFX7-NEXT: v_mov_b32_e32 v0, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v0, v1 -; GFX7-NEXT: s_lshr_b32 s6, s6, 16 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1182,23 +1182,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s8, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX8-NEXT: s_load_dword s6, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s3, s3, s8 -; GFX8-NEXT: s_lshr_b32 s2, s2, 16 -; GFX8-NEXT: s_and_b32 s5, s5, s8 -; GFX8-NEXT: v_mov_b32_e32 v0, s6 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s5, v1, v0 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v1, s2 -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: s_mov_b32 s2, 0xffff +; GFX8-NEXT: s_and_b32 s3, s3, s2 +; GFX8-NEXT: s_and_b32 s2, s5, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s3 +; GFX8-NEXT: v_mov_b32_e32 v3, s6 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v2, v3 +; GFX8-NEXT: s_lshr_b32 s1, s4, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1206,23 +1206,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s8, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NODL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; GFX9-NODL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX9-NODL-NEXT: s_load_dword s6, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s3, s3, s8 -; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 16 -; GFX9-NODL-NEXT: s_and_b32 s5, s5, s8 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s5, v1, v0 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v1, v0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s2 +; GFX9-NODL-NEXT: s_and_b32 s2, s5, s2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s3 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v2, v3 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s4, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1230,23 +1230,23 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s8, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 ; GFX9-DL-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 ; GFX9-DL-NEXT: s_load_dword s6, s[0:1], 0x0 -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s3, s3, s8 -; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 16 -; GFX9-DL-NEXT: s_and_b32 s5, s5, s8 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s5, v1, v0 -; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v1, v0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff +; GFX9-DL-NEXT: s_and_b32 s3, s3, s2 +; GFX9-DL-NEXT: s_and_b32 s2, s5, s2 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v2, v3 +; GFX9-DL-NEXT: s_lshr_b32 s1, s4, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i16> addrspace(1)* %src2, @@ -1279,23 +1279,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_lshr_b32 s9, s5, 16 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v0, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1303,23 +1303,23 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s2, v1, v0 -; GFX8-NEXT: s_lshr_b32 s7, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mad_u32_u24 v2, s7, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: s_and_b32 s2, s2, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v2, v3 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1327,23 +1327,23 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s2, v1, v0 -; GFX9-NODL-NEXT: s_lshr_b32 s7, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v2, v3 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1351,23 +1351,23 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s2, v1, v0 -; GFX9-DL-NEXT: s_lshr_b32 s7, s4, 16 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s7, v1, v0 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-DL-NEXT: s_and_b32 s2, s2, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v2, v3 +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, @@ -1400,24 +1400,24 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_lshr_b32 s9, s5, 16 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: v_mov_b32_e32 v0, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v0, v1 -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_u32_u24 v1, s5, v1, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mad_u32_u24 v1, s3, v1, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1425,24 +1425,24 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mad_u32_u24 v1, s2, v1, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v3, s0, v3, v2 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1450,24 +1450,24 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: v_mad_u32_u24 v1, s2, v1, v0 -; GFX9-NODL-NEXT: v_add_u32_e32 v2, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v3, s0, v3, v2 +; GFX9-NODL-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1475,24 +1475,24 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-DL-NEXT: v_mad_u32_u24 v1, s2, v1, v0 -; GFX9-DL-NEXT: v_add_u32_e32 v2, v1, v0 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-DL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-DL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-DL-NEXT: v_mad_u32_u24 v3, s0, v3, v2 +; GFX9-DL-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, @@ -1527,23 +1527,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i16 s7, s4 -; GFX7-NEXT: s_ashr_i32 s4, s4, 16 -; GFX7-NEXT: s_sext_i32_i16 s8, s5 -; GFX7-NEXT: s_ashr_i32 s5, s5, 16 +; GFX7-NEXT: s_sext_i32_i16 s5, s2 +; GFX7-NEXT: s_ashr_i32 s2, s2, 16 +; GFX7-NEXT: s_sext_i32_i16 s6, s3 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_ashr_i32 s3, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_i32_i24 v0, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mad_i32_i24 v1, s8, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s3, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v2, s5 +; GFX7-NEXT: v_mad_i32_i24 v1, s6, v2, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1650,24 +1650,24 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: s_lshr_b32 s9, s5, 16 -; GFX7-NEXT: s_and_b32 s5, s5, s8 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v1, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v2, s7 -; GFX7-NEXT: v_mad_u32_u24 v1, s9, v2, v1 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v0, v1 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v2, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1675,24 +1675,24 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mad_u32_u24 v0, s2, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v2, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v2, v0 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: s_mov_b32 s5, 0xffff ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: s_and_b32 s2, s2, s5 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: s_and_b32 s3, s3, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s2 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_mad_u32_u24 v3, s3, v2, v3 +; GFX8-NEXT: v_mov_b32_e32 v4, s0 +; GFX8-NEXT: v_mad_u32_u24 v3, s1, v4, v3 +; GFX8-NEXT: v_mad_u32_u24 v2, s3, v2, v3 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1700,24 +1700,24 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s2, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v2, v0 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NODL-NEXT: s_mov_b32 s5, 0xffff ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s5 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s5 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: v_mad_u32_u24 v3, s3, v2, v3 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-NODL-NEXT: v_mad_u32_u24 v3, s1, v4, v3 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v2, v3 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1725,24 +1725,24 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s2, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v2, s3 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s4, v2, v0 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-DL-NEXT: s_mov_b32 s5, 0xffff ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-DL-NEXT: s_and_b32 s2, s2, s5 +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-DL-NEXT: s_and_b32 s3, s3, s5 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: v_mad_u32_u24 v3, s3, v2, v3 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s0 +; GFX9-DL-NEXT: v_mad_u32_u24 v3, s1, v4, v3 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s3, v2, v3 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, @@ -1778,23 +1778,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i16 s7, s4 -; GFX7-NEXT: s_sext_i32_i16 s8, s5 -; GFX7-NEXT: s_ashr_i32 s4, s4, 16 -; GFX7-NEXT: v_mov_b32_e32 v0, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_ashr_i32 s5, s5, 16 -; GFX7-NEXT: v_mad_i32_i24 v1, s8, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v2, s4 -; GFX7-NEXT: v_mad_i32_i24 v1, s5, v2, v1 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v0, v1 +; GFX7-NEXT: s_sext_i32_i16 s5, s2 +; GFX7-NEXT: s_sext_i32_i16 s6, s3 +; GFX7-NEXT: s_ashr_i32 s2, s2, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_ashr_i32 s3, s3, 16 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s3, v2, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1902,24 +1902,24 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_lshr_b32 s9, s5, 16 -; GFX7-NEXT: v_mov_b32_e32 v0, s7 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v1, s9, v0, v1 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v0, v1 -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 16 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: s_and_b32 s2, s2, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_and_b32 s3, s3, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s3, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1927,24 +1927,24 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_lshr_b32 s3, s3, 16 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_lshr_b32 s4, s4, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s5 -; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s6 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_mad_u32_u24 v3, s1, v2, v3 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1952,24 +1952,24 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: v_mad_u32_u24 v3, s1, v2, v3 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1977,24 +1977,24 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 16 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s4, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v1, v0 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: v_mad_u32_u24 v3, s1, v2, v3 +; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v2, v3 +; GFX9-DL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-DL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <2 x i16> addrspace(1)* %src2, @@ -2030,23 +2030,23 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i16 s7, s4 -; GFX7-NEXT: s_ashr_i32 s4, s4, 16 -; GFX7-NEXT: s_sext_i32_i16 s8, s5 -; GFX7-NEXT: s_ashr_i32 s5, s5, 16 +; GFX7-NEXT: s_sext_i32_i16 s5, s2 +; GFX7-NEXT: s_ashr_i32 s2, s2, 16 +; GFX7-NEXT: s_sext_i32_i16 s6, s3 +; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: s_ashr_i32 s3, s3, 16 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_i32_i24 v1, s5, v0, v1 -; GFX7-NEXT: v_mad_i32_i24 v0, s5, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s3, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s3, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v2, s5 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v2, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -2155,20 +2155,20 @@ ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_mov_b32 s8, 0xffff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_lshr_b32 s6, s4, 16 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: s_lshr_b32 s7, s5, 16 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: s_and_b32 s5, s5, s8 +; GFX7-NEXT: s_lshr_b32 s2, s5, 16 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: buffer_load_ushort v1, off, s[0:3], 0 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v1 +; GFX7-NEXT: s_mov_b32 s6, 0xffff +; GFX7-NEXT: s_and_b32 s5, s5, s6 +; GFX7-NEXT: s_and_b32 s4, s4, s6 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 @@ -2179,22 +2179,22 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_mov_b32 s0, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s3, s1, s0 -; GFX8-NEXT: s_and_b32 s0, s2, s0 -; GFX8-NEXT: s_lshr_b32 s2, s2, 16 -; GFX8-NEXT: s_lshr_b32 s1, s1, 16 -; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: s_lshr_b32 s1, s3, 16 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: s_mov_b32 s0, 0xffff +; GFX8-NEXT: s_and_b32 s1, s2, s0 +; GFX8-NEXT: s_and_b32 s0, s3, s0 ; GFX8-NEXT: v_mov_b32_e32 v3, s0 -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -2203,22 +2203,22 @@ ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s0 -; GFX9-NODL-NEXT: s_and_b32 s0, s2, s0 -; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 16 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 16 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s0 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -2263,19 +2263,19 @@ ; GFX7-LABEL: notsdot2_sext8: ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s10, s2 +; GFX7-NEXT: s_mov_b32 s11, s3 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_load_dword s12, s[0:1], 0x0 ; GFX7-NEXT: s_mov_b32 s8, s6 ; GFX7-NEXT: s_mov_b32 s9, s7 -; GFX7-NEXT: s_mov_b32 s11, s3 ; GFX7-NEXT: s_mov_b32 s6, s2 ; GFX7-NEXT: s_mov_b32 s7, s3 ; GFX7-NEXT: buffer_load_ushort v0, off, s[4:7], 0 ; GFX7-NEXT: buffer_load_ushort v1, off, s[8:11], 0 -; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt vmcnt(1) ; GFX7-NEXT: v_bfe_i32 v2, v0, 0, 8 ; GFX7-NEXT: s_waitcnt vmcnt(0) @@ -2283,7 +2283,7 @@ ; GFX7-NEXT: v_bfe_i32 v0, v0, 8, 8 ; GFX7-NEXT: v_bfe_i32 v1, v1, 8, 8 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mad_i32_i24 v0, v1, v0, s4 +; GFX7-NEXT: v_mad_i32_i24 v0, v1, v0, s12 ; GFX7-NEXT: v_mad_i32_i24 v0, v3, v2, v0 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/idot4.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/idot4.ll +++ llvm/test/CodeGen/AMDGPU/idot4.ll @@ -9,31 +9,31 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_movk_i32 s8, 0xff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_movk_i32 s5, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_and_b32 s8, s5, s8 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v0, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_u32 s12, s5, 0x80010 -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v0, v1 -; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 +; GFX7-NEXT: s_and_b32 s6, s2, s5 +; GFX7-NEXT: s_and_b32 s5, s3, s5 +; GFX7-NEXT: s_bfe_u32 s8, s3, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: s_bfe_u32 s10, s3, 0x80010 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v1 +; GFX7-NEXT: s_bfe_u32 s7, s2, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: s_bfe_u32 s9, s2, 0x80010 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s10 -; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 +; GFX7-NEXT: s_lshr_b32 s3, s3, 24 ; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s12 -; GFX7-NEXT: s_lshr_b32 s5, s5, 24 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v1, v0 -; GFX7-NEXT: s_lshr_b32 s4, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_lshr_b32 s2, s2, 24 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -41,31 +41,31 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX8-NEXT: v_mad_u32_u24 v0, s6, v0, v1 -; GFX8-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v1, s8 -; GFX8-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX8-NEXT: v_mad_u32_u24 v0, s7, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s10 -; GFX8-NEXT: s_lshr_b32 s4, s4, 24 -; GFX8-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX8-NEXT: s_lshr_b32 s3, s3, 24 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_movk_i32 s1, 0xff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s5, s2, s1 +; GFX8-NEXT: s_and_b32 s1, s3, s1 +; GFX8-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX8-NEXT: v_mov_b32_e32 v2, s1 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX8-NEXT: v_mad_u32_u24 v2, s5, v2, v3 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: v_mov_b32_e32 v3, s6 +; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s8 +; GFX8-NEXT: s_lshr_b32 s3, s3, 24 +; GFX8-NEXT: v_mad_u32_u24 v2, s7, v3, v2 +; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -73,31 +73,31 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_movk_i32 s2, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s6, v0, v1 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-NODL-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s7, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 24 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_movk_i32 s1, 0xff +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_and_b32 s5, s2, s1 +; GFX9-NODL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NODL-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v2, v3 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v3, v2 +; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -162,31 +162,31 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff +; GFX7-NEXT: s_movk_i32 s9, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 -; GFX7-NEXT: s_and_b32 s6, s5, s8 -; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80010 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_lshr_b32 s5, s5, 24 -; GFX7-NEXT: v_mov_b32_e32 v3, s10 -; GFX7-NEXT: s_lshr_b32 s4, s4, 24 +; GFX7-NEXT: s_bfe_u32 s6, s4, 0x80008 +; GFX7-NEXT: s_bfe_u32 s2, s5, 0x80008 +; GFX7-NEXT: s_bfe_u32 s3, s5, 0x80010 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: s_lshr_b32 s2, s5, 24 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: buffer_load_ushort v3, off, s[0:3], 0 +; GFX7-NEXT: s_and_b32 s5, s5, s9 +; GFX7-NEXT: s_bfe_u32 s7, s4, 0x80010 +; GFX7-NEXT: s_lshr_b32 s8, s4, 24 +; GFX7-NEXT: s_and_b32 s4, s4, s9 +; GFX7-NEXT: v_mov_b32_e32 v4, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mad_u32_u24 v3, s4, v4, v3 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v3 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s8, v2, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -195,30 +195,30 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s3, s1, s0 -; GFX8-NEXT: s_and_b32 s0, s2, s0 -; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v3, s0 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX8-NEXT: s_bfe_u32 s4, s1, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v4, s5 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX8-NEXT: s_lshr_b32 s2, s2, 24 -; GFX8-NEXT: v_mov_b32_e32 v5, s7 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: s_lshr_b32 s5, s3, 24 +; GFX8-NEXT: v_mov_b32_e32 v5, s5 +; GFX8-NEXT: s_movk_i32 s5, 0xff +; GFX8-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX8-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX8-NEXT: s_and_b32 s3, s3, s5 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_bfe_u32 s1, s2, 0x80010 +; GFX8-NEXT: s_lshr_b32 s4, s2, 24 +; GFX8-NEXT: s_and_b32 s2, s2, s5 +; GFX8-NEXT: v_mov_b32_e32 v6, s3 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s6, v5, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s2 -; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v6, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v4, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s4, v5, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -227,30 +227,30 @@ ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_movk_i32 s0, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s0 -; GFX9-NODL-NEXT: s_and_b32 s0, s2, s0 -; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX9-NODL-NEXT: s_bfe_u32 s4, s1, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s7 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: s_lshr_b32 s5, s3, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NODL-NEXT: s_movk_i32 s5, 0xff +; GFX9-NODL-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s5 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NODL-NEXT: s_bfe_u32 s1, s2, 0x80010 +; GFX9-NODL-NEXT: s_lshr_b32 s4, s2, 24 +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s5 +; GFX9-NODL-NEXT: v_mov_b32_e32 v6, s3 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v3, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s6, v5, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v6, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v4, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v5, v2 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -315,31 +315,31 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff +; GFX7-NEXT: s_movk_i32 s9, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 -; GFX7-NEXT: s_and_b32 s6, s5, s8 -; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80010 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_lshr_b32 s5, s5, 24 -; GFX7-NEXT: v_mov_b32_e32 v3, s10 -; GFX7-NEXT: s_lshr_b32 s4, s4, 24 +; GFX7-NEXT: s_bfe_u32 s6, s4, 0x80008 +; GFX7-NEXT: s_bfe_u32 s2, s5, 0x80008 +; GFX7-NEXT: s_bfe_u32 s3, s5, 0x80010 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: s_lshr_b32 s2, s5, 24 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 +; GFX7-NEXT: s_and_b32 s5, s5, s9 +; GFX7-NEXT: s_bfe_u32 s7, s4, 0x80010 +; GFX7-NEXT: s_lshr_b32 s8, s4, 24 +; GFX7-NEXT: s_and_b32 s4, s4, s9 +; GFX7-NEXT: v_mov_b32_e32 v4, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mad_u32_u24 v3, s4, v4, v3 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v3 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s8, v2, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -347,31 +347,31 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s4, s0, 0x80008 -; GFX8-NEXT: s_and_b32 s3, s1, s2 -; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 -; GFX8-NEXT: s_and_b32 s2, s0, s2 -; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX8-NEXT: v_mov_b32_e32 v4, s5 -; GFX8-NEXT: s_bfe_u32 s7, s0, 0x80010 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: v_mov_b32_e32 v5, s6 -; GFX8-NEXT: s_lshr_b32 s0, s0, 24 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s7, v5, v2 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: s_lshr_b32 s5, s3, 24 +; GFX8-NEXT: v_mov_b32_e32 v5, s5 +; GFX8-NEXT: s_movk_i32 s5, 0xff +; GFX8-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX8-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX8-NEXT: s_and_b32 s3, s3, s5 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_bfe_u32 s1, s2, 0x80010 +; GFX8-NEXT: s_lshr_b32 s4, s2, 24 +; GFX8-NEXT: s_and_b32 s2, s2, s5 +; GFX8-NEXT: v_mov_b32_e32 v6, s3 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v6, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v4, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s4, v5, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -379,31 +379,31 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_movk_i32 s2, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_u32 s4, s0, 0x80008 -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s5, s1, 0x80008 -; GFX9-NODL-NEXT: s_and_b32 s2, s0, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s0, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-NODL-NEXT: s_lshr_b32 s0, s0, 24 -; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v5, v2 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: s_lshr_b32 s5, s3, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NODL-NEXT: s_movk_i32 s5, 0xff +; GFX9-NODL-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s5 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NODL-NEXT: s_bfe_u32 s1, s2, 0x80010 +; GFX9-NODL-NEXT: s_lshr_b32 s4, s2, 24 +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s5 +; GFX9-NODL-NEXT: v_mov_b32_e32 v6, s3 +; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v6, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v4, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v5, v2 ; GFX9-NODL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -462,22 +462,22 @@ ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 +; GFX7-NEXT: s_movk_i32 s7, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_bfe_u32 s4, s4, 0x80008 -; GFX7-NEXT: s_and_b32 s6, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_u32 s5, s5, 0x80008 +; GFX7-NEXT: s_bfe_u32 s6, s4, 0x80008 +; GFX7-NEXT: s_bfe_u32 s2, s5, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 +; GFX7-NEXT: s_and_b32 s5, s5, s7 +; GFX7-NEXT: s_and_b32 s4, s4, s7 +; GFX7-NEXT: v_mov_b32_e32 v2, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v1, s4, v2, v1 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v1 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -485,22 +485,22 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s3, s1, s2 -; GFX8-NEXT: s_and_b32 s2, s0, s2 -; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_bfe_u32 s1, s1, 0x80008 -; GFX8-NEXT: s_bfe_u32 s0, s0, 0x80008 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: s_bfe_u32 s1, s3, 0x80008 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: s_movk_i32 s1, 0xff +; GFX8-NEXT: s_and_b32 s3, s3, s1 +; GFX8-NEXT: s_and_b32 s1, s2, s1 +; GFX8-NEXT: v_mov_b32_e32 v4, s3 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v4, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm @@ -509,22 +509,22 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_movk_i32 s2, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s2 -; GFX9-NODL-NEXT: s_and_b32 s2, s0, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NODL-NEXT: s_bfe_u32 s1, s1, 0x80008 -; GFX9-NODL-NEXT: s_bfe_u32 s0, s0, 0x80008 -; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s1, s3, 0x80008 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: s_movk_i32 s1, 0xff +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s1 +; GFX9-NODL-NEXT: s_and_b32 s1, s2, s1 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v4, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm @@ -533,22 +533,22 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_movk_i32 s2, 0xff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s3, s1, s2 -; GFX9-DL-NEXT: s_and_b32 s2, s0, s2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-DL-NEXT: s_bfe_u32 s1, s1, 0x80008 -; GFX9-DL-NEXT: s_bfe_u32 s0, s0, 0x80008 -; GFX9-DL-NEXT: s_waitcnt vmcnt(0) -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-DL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-DL-NEXT: s_bfe_u32 s1, s3, 0x80008 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-DL-NEXT: s_movk_i32 s1, 0xff +; GFX9-DL-NEXT: s_and_b32 s3, s3, s1 +; GFX9-DL-NEXT: s_and_b32 s1, s2, s1 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-DL-NEXT: s_waitcnt vmcnt(0) +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v4, v2 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm @@ -580,27 +580,27 @@ ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff +; GFX7-NEXT: s_movk_i32 s10, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 +; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s6, s4, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_and_b32 s7, s5, s8 -; GFX7-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX7-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX7-NEXT: s_bfe_u32 s9, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: s_bfe_u32 s11, s5, 0x80010 +; GFX7-NEXT: s_and_b32 s11, s4, s10 +; GFX7-NEXT: s_bfe_u32 s6, s4, 0x80008 +; GFX7-NEXT: s_and_b32 s10, s5, s10 +; GFX7-NEXT: v_mov_b32_e32 v1, s11 +; GFX7-NEXT: s_bfe_u32 s8, s4, 0x80010 +; GFX7-NEXT: s_bfe_u32 s7, s5, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: s_bfe_u32 s9, s5, 0x80010 ; GFX7-NEXT: s_lshr_b32 s4, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v3, s10 +; GFX7-NEXT: v_mov_b32_e32 v3, s8 ; GFX7-NEXT: s_lshr_b32 s5, s5, 24 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s10, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s9, v3, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s4 ; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 @@ -611,30 +611,30 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: s_movk_i32 s5, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s3, s1, s0 -; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 -; GFX8-NEXT: s_and_b32 s0, s2, s0 -; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX8-NEXT: s_bfe_u32 s4, s2, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v4, s5 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: v_mov_b32_e32 v5, s6 +; GFX8-NEXT: s_and_b32 s6, s2, s5 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: s_and_b32 s5, s3, s5 +; GFX8-NEXT: v_mov_b32_e32 v4, s6 +; GFX8-NEXT: s_bfe_u32 s4, s2, 0x80010 +; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX8-NEXT: s_bfe_u32 s0, s3, 0x80010 ; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: v_mov_b32_e32 v5, s4 +; GFX8-NEXT: s_lshr_b32 s3, s3, 24 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s5, v4, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v5, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s2 +; GFX8-NEXT: v_mad_u32_u24 v2, s3, v3, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -643,30 +643,30 @@ ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_movk_i32 s0, 0xff +; GFX9-NODL-NEXT: s_movk_i32 s5, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s0 -; GFX9-NODL-NEXT: s_bfe_u32 s5, s1, 0x80008 -; GFX9-NODL-NEXT: s_and_b32 s0, s2, s0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX9-NODL-NEXT: s_bfe_u32 s4, s2, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-NODL-NEXT: s_and_b32 s6, s2, s5 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: s_and_b32 s5, s3, s5 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s6 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s2, 0x80010 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s3, 0x80010 ; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s4 +; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v4, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v5, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v3, v2 ; GFX9-NODL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -725,29 +725,29 @@ ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff +; GFX7-NEXT: s_movk_i32 s10, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s6, s4, s8 -; GFX7-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX7-NEXT: s_and_b32 s7, s5, s8 -; GFX7-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX7-NEXT: s_bfe_u32 s9, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v1, s8 -; GFX7-NEXT: v_mov_b32_e32 v2, s6 -; GFX7-NEXT: s_bfe_u32 s11, s5, 0x80010 -; GFX7-NEXT: s_lshr_b32 s4, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v3, s10 +; GFX7-NEXT: s_bfe_u32 s2, s4, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 +; GFX7-NEXT: s_bfe_u32 s6, s5, 0x80008 +; GFX7-NEXT: s_bfe_u32 s7, s4, 0x80010 +; GFX7-NEXT: s_lshr_b32 s9, s4, 24 +; GFX7-NEXT: s_and_b32 s4, s4, s10 +; GFX7-NEXT: s_and_b32 s10, s5, s10 +; GFX7-NEXT: v_mov_b32_e32 v2, s4 +; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80010 +; GFX7-NEXT: v_mov_b32_e32 v3, s7 ; GFX7-NEXT: s_lshr_b32 s5, s5, 24 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v1 +; GFX7-NEXT: v_mad_u32_u24 v0, s10, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s8, v3, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s9 ; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -757,30 +757,30 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s4, s1, 0x80008 -; GFX8-NEXT: s_and_b32 s3, s2, s0 -; GFX8-NEXT: s_and_b32 s0, s1, s0 -; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: s_movk_i32 s6, 0xff +; GFX8-NEXT: v_mov_b32_e32 v3, s0 +; GFX8-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80010 +; GFX8-NEXT: s_lshr_b32 s5, s2, 24 +; GFX8-NEXT: s_and_b32 s2, s2, s6 ; GFX8-NEXT: v_mov_b32_e32 v4, s0 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: v_mov_b32_e32 v5, s6 -; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX8-NEXT: s_lshr_b32 s0, s3, 24 +; GFX8-NEXT: s_and_b32 s3, s3, s6 +; GFX8-NEXT: v_mov_b32_e32 v5, s2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s5, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v4, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s3, v5, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s4, v4, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s5 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -789,30 +789,30 @@ ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_movk_i32 s0, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_u32 s4, s1, 0x80008 -; GFX9-NODL-NEXT: s_and_b32 s3, s2, s0 -; GFX9-NODL-NEXT: s_and_b32 s0, s1, s0 -; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: s_movk_i32 s6, 0xff +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80010 +; GFX9-NODL-NEXT: s_lshr_b32 s5, s2, 24 +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s6 ; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s0 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX9-NODL-NEXT: s_lshr_b32 s0, s3, 24 +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s6 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s2 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v3, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v4, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v5, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v4, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -821,30 +821,30 @@ ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-DL-NEXT: s_movk_i32 s0, 0xff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_bfe_u32 s4, s1, 0x80008 -; GFX9-DL-NEXT: s_and_b32 s3, s2, s0 -; GFX9-DL-NEXT: s_and_b32 s0, s1, s0 -; GFX9-DL-NEXT: s_bfe_u32 s5, s2, 0x80008 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 -; GFX9-DL-NEXT: s_bfe_u32 s6, s1, 0x80010 +; GFX9-DL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-DL-NEXT: s_movk_i32 s6, 0xff +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s0 +; GFX9-DL-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX9-DL-NEXT: s_bfe_u32 s0, s2, 0x80010 +; GFX9-DL-NEXT: s_lshr_b32 s5, s2, 24 +; GFX9-DL-NEXT: s_and_b32 s2, s2, s6 ; GFX9-DL-NEXT: v_mov_b32_e32 v4, s0 -; GFX9-DL-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX9-DL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-DL-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-DL-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX9-DL-NEXT: s_lshr_b32 s0, s3, 24 +; GFX9-DL-NEXT: s_and_b32 s3, s3, s6 +; GFX9-DL-NEXT: v_mov_b32_e32 v5, s2 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s5, v3, v2 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s3, v4, v2 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s7, v5, v2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s3, v5, v2 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v4, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -884,32 +884,32 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_movk_i32 s8, 0xff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_movk_i32 s5, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_and_b32 s8, s5, s8 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v0, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 -; GFX7-NEXT: v_mad_u32_u24 v1, s7, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v2, s10 -; GFX7-NEXT: s_bfe_u32 s12, s5, 0x80010 -; GFX7-NEXT: v_mad_u32_u24 v1, s9, v2, v1 -; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s12 -; GFX7-NEXT: s_lshr_b32 s5, s5, 24 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v1, v0 -; GFX7-NEXT: s_lshr_b32 s4, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: s_and_b32 s6, s2, s5 +; GFX7-NEXT: s_and_b32 s5, s3, s5 +; GFX7-NEXT: s_bfe_u32 s8, s3, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: s_bfe_u32 s7, s2, 0x80008 +; GFX7-NEXT: v_mad_u32_u24 v1, s6, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_bfe_u32 s10, s3, 0x80010 +; GFX7-NEXT: v_mad_u32_u24 v1, s7, v2, v1 +; GFX7-NEXT: s_bfe_u32 s9, s2, 0x80010 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s10 +; GFX7-NEXT: s_lshr_b32 s3, s3, 24 +; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_lshr_b32 s2, s2, 24 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -917,32 +917,32 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX8-NEXT: v_mad_u32_u24 v1, s6, v0, v1 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 -; GFX8-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX8-NEXT: v_mad_u32_u24 v1, s7, v2, v1 -; GFX8-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX8-NEXT: v_mad_u32_u24 v0, s6, v0, v1 -; GFX8-NEXT: v_mov_b32_e32 v1, s10 -; GFX8-NEXT: s_lshr_b32 s4, s4, 24 -; GFX8-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX8-NEXT: s_lshr_b32 s3, s3, 24 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_movk_i32 s1, 0xff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s5, s2, s1 +; GFX8-NEXT: s_and_b32 s1, s3, s1 +; GFX8-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX8-NEXT: v_mov_b32_e32 v2, s1 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: v_mad_u32_u24 v3, s5, v2, v3 +; GFX8-NEXT: v_mov_b32_e32 v4, s6 +; GFX8-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX8-NEXT: v_mad_u32_u24 v3, s0, v4, v3 +; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX8-NEXT: v_mad_u32_u24 v2, s5, v2, v3 +; GFX8-NEXT: v_mov_b32_e32 v3, s8 +; GFX8-NEXT: s_lshr_b32 s3, s3, 24 +; GFX8-NEXT: v_mad_u32_u24 v2, s7, v3, v2 +; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -950,32 +950,32 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_movk_i32 s2, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX9-NODL-NEXT: v_mad_u32_u24 v1, s6, v0, v1 -; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-NODL-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX9-NODL-NEXT: v_mad_u32_u24 v1, s7, v2, v1 -; GFX9-NODL-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s6, v0, v1 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 24 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_movk_i32 s1, 0xff +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_and_b32 s5, s2, s1 +; GFX9-NODL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NODL-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: v_mad_u32_u24 v3, s5, v2, v3 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s6 +; GFX9-NODL-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX9-NODL-NEXT: v_mad_u32_u24 v3, s0, v4, v3 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v2, v3 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v3, v2 +; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -983,32 +983,32 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_movk_i32 s2, 0xff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-DL-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX9-DL-NEXT: v_mad_u32_u24 v1, s6, v0, v1 -; GFX9-DL-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-DL-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX9-DL-NEXT: v_mad_u32_u24 v1, s7, v2, v1 -; GFX9-DL-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s6, v0, v1 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 24 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s9, v1, v0 -; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s3, v1, v0 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_movk_i32 s1, 0xff +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_and_b32 s5, s2, s1 +; GFX9-DL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-DL-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s1 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-DL-NEXT: v_mad_u32_u24 v3, s5, v2, v3 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s6 +; GFX9-DL-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX9-DL-NEXT: v_mad_u32_u24 v3, s0, v4, v3 +; GFX9-DL-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s5, v2, v3 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s7, v3, v2 +; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -1057,33 +1057,33 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_movk_i32 s8, 0xff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX7-NEXT: s_movk_i32 s5, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80008 -; GFX7-NEXT: s_and_b32 s8, s5, s8 -; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v0, s10 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v0, v1 -; GFX7-NEXT: s_bfe_u32 s12, s5, 0x80010 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: v_add_i32_e32 v1, vcc, s6, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v2, v0 -; GFX7-NEXT: v_mov_b32_e32 v2, s12 -; GFX7-NEXT: s_lshr_b32 s5, s5, 24 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v2, v0 -; GFX7-NEXT: s_lshr_b32 s4, s4, 24 +; GFX7-NEXT: s_and_b32 s6, s2, s5 +; GFX7-NEXT: s_bfe_u32 s8, s3, 0x80008 +; GFX7-NEXT: s_and_b32 s5, s3, s5 +; GFX7-NEXT: s_bfe_u32 s7, s2, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s8 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v0, v1 +; GFX7-NEXT: s_bfe_u32 s10, s3, 0x80010 ; GFX7-NEXT: v_mov_b32_e32 v2, s5 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v2, v0 +; GFX7-NEXT: s_bfe_u32 s9, s2, 0x80010 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v2, v0 +; GFX7-NEXT: v_mov_b32_e32 v2, s10 +; GFX7-NEXT: s_lshr_b32 s3, s3, 24 +; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 +; GFX7-NEXT: v_mov_b32_e32 v2, s3 +; GFX7-NEXT: s_lshr_b32 s2, s2, 24 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v2, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1091,33 +1091,33 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s3, s2 -; GFX8-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v0, s8 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_mad_u32_u24 v0, s7, v0, v1 -; GFX8-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX8-NEXT: v_mov_b32_e32 v2, s2 -; GFX8-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, s5, v0 -; GFX8-NEXT: v_mad_u32_u24 v0, s6, v2, v0 -; GFX8-NEXT: v_mov_b32_e32 v2, s10 -; GFX8-NEXT: s_lshr_b32 s4, s4, 24 -; GFX8-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX8-NEXT: s_lshr_b32 s3, s3, 24 -; GFX8-NEXT: v_mov_b32_e32 v2, s4 -; GFX8-NEXT: v_mad_u32_u24 v0, s3, v2, v0 -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_movk_i32 s1, 0xff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX8-NEXT: s_and_b32 s5, s2, s1 +; GFX8-NEXT: s_and_b32 s1, s3, s1 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: v_mov_b32_e32 v2, s6 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v2, v3 +; GFX8-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX8-NEXT: v_mov_b32_e32 v4, s1 +; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX8-NEXT: v_add_u32_e32 v3, vcc, s4, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s5, v4, v2 +; GFX8-NEXT: v_mov_b32_e32 v4, s8 +; GFX8-NEXT: s_lshr_b32 s3, s3, 24 +; GFX8-NEXT: v_mad_u32_u24 v2, s7, v4, v2 +; GFX8-NEXT: s_lshr_b32 s2, s2, 24 +; GFX8-NEXT: v_mov_b32_e32 v4, s3 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v4, v2 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1125,33 +1125,33 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_movk_i32 s2, 0xff -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s8 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s7, v0, v1 -; GFX9-NODL-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX9-NODL-NEXT: v_add_u32_e32 v1, s5, v0 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s6, v2, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s10 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s4, 24 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s3, v2, v0 -; GFX9-NODL-NEXT: v_add_u32_e32 v2, v0, v1 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_movk_i32 s1, 0xff +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX9-NODL-NEXT: s_and_b32 s5, s2, s1 +; GFX9-NODL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v2, v3 +; GFX9-NODL-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX9-NODL-NEXT: v_add_u32_e32 v3, s4, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v4, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v4, v2 +; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v4, v2 +; GFX9-NODL-NEXT: v_add_u32_e32 v2, v2, v3 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1159,33 +1159,33 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_movk_i32 s2, 0xff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_and_b32 s6, s3, s2 -; GFX9-DL-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: s_bfe_u32 s7, s3, 0x80008 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s8 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s7, v0, v1 -; GFX9-DL-NEXT: s_bfe_u32 s10, s4, 0x80010 -; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-DL-NEXT: s_bfe_u32 s9, s3, 0x80010 -; GFX9-DL-NEXT: v_add_u32_e32 v1, s5, v0 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s6, v2, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v2, s10 -; GFX9-DL-NEXT: s_lshr_b32 s4, s4, 24 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24 -; GFX9-DL-NEXT: v_mov_b32_e32 v2, s4 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s3, v2, v0 -; GFX9-DL-NEXT: v_add_u32_e32 v2, v0, v1 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_movk_i32 s1, 0xff +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_bfe_u32 s6, s3, 0x80008 +; GFX9-DL-NEXT: s_and_b32 s5, s2, s1 +; GFX9-DL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-DL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v2, v3 +; GFX9-DL-NEXT: s_bfe_u32 s8, s3, 0x80010 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s1 +; GFX9-DL-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX9-DL-NEXT: v_add_u32_e32 v3, s4, v2 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s5, v4, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s7, v4, v2 +; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 24 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v4, v2 +; GFX9-DL-NEXT: v_add_u32_e32 v2, v2, v3 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -1236,29 +1236,29 @@ ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_mov_b32 s8, 0xffff +; GFX7-NEXT: s_mov_b32 s12, 0xffff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 +; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_sext_i32_i8 s6, s4 -; GFX7-NEXT: s_bfe_u32 s10, s4, 0x80008 ; GFX7-NEXT: s_sext_i32_i8 s7, s5 -; GFX7-NEXT: s_bfe_u32 s9, s5, 0x80008 -; GFX7-NEXT: s_and_b32 s7, s7, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: s_bfe_u32 s11, s5, 0x80010 -; GFX7-NEXT: s_and_b32 s6, s6, s8 +; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80008 +; GFX7-NEXT: s_and_b32 s7, s7, s12 +; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80010 +; GFX7-NEXT: s_and_b32 s6, s6, s12 ; GFX7-NEXT: v_mov_b32_e32 v3, s7 -; GFX7-NEXT: s_bfe_u32 s12, s4, 0x80010 +; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 ; GFX7-NEXT: s_lshr_b32 s5, s5, 24 -; GFX7-NEXT: v_mov_b32_e32 v2, s11 +; GFX7-NEXT: v_mov_b32_e32 v2, s10 ; GFX7-NEXT: s_lshr_b32 s4, s4, 24 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s10, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s6, v3, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s12, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s11, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 @@ -1268,33 +1268,33 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_i32 s3, s0, 0x80000 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80008 -; GFX8-NEXT: s_bfe_i32 s4, s1, 0x80000 -; GFX8-NEXT: s_and_b32 s3, s2, s3 -; GFX8-NEXT: s_and_b32 s2, s2, s4 -; GFX8-NEXT: s_bfe_u32 s5, s0, 0x80008 -; GFX8-NEXT: v_mov_b32_e32 v3, s6 -; GFX8-NEXT: s_bfe_u32 s8, s1, 0x80010 -; GFX8-NEXT: v_mov_b32_e32 v5, s2 -; GFX8-NEXT: s_bfe_u32 s7, s0, 0x80010 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 -; GFX8-NEXT: s_lshr_b32 s0, s0, 24 +; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80000 +; GFX8-NEXT: s_bfe_u32 s4, s3, 0x80008 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX8-NEXT: s_bfe_i32 s1, s3, 0x80000 +; GFX8-NEXT: s_lshr_b32 s3, s3, 24 +; GFX8-NEXT: v_mov_b32_e32 v5, s3 +; GFX8-NEXT: s_mov_b32 s3, 0xffff +; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80008 +; GFX8-NEXT: s_and_b32 s1, s3, s1 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: s_bfe_u32 s6, s2, 0x80010 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_lshr_b32 s2, s2, 24 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_mad_u32_u24 v2, s5, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v5, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s7, v4, v2 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s6, v4, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v5, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1302,33 +1302,33 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_i32 s3, s0, 0x80000 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80008 -; GFX9-NODL-NEXT: s_bfe_i32 s4, s1, 0x80000 -; GFX9-NODL-NEXT: s_and_b32 s3, s2, s3 -; GFX9-NODL-NEXT: s_and_b32 s2, s2, s4 -; GFX9-NODL-NEXT: s_bfe_u32 s5, s0, 0x80008 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s6 -; GFX9-NODL-NEXT: s_bfe_u32 s8, s1, 0x80010 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s0, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-NODL-NEXT: s_lshr_b32 s0, s0, 24 +; GFX9-NODL-NEXT: s_bfe_i32 s0, s2, 0x80000 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s3, 0x80008 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX9-NODL-NEXT: s_bfe_i32 s1, s3, 0x80000 +; GFX9-NODL-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s3 +; GFX9-NODL-NEXT: s_mov_b32 s3, 0xffff +; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80008 +; GFX9-NODL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: s_bfe_u32 s6, s2, 0x80010 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v3, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v5, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v4, v2 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s6, v4, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v5, v2 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1336,33 +1336,33 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s2, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ushort v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_bfe_i32 s3, s0, 0x80000 -; GFX9-DL-NEXT: s_bfe_u32 s6, s1, 0x80008 -; GFX9-DL-NEXT: s_bfe_i32 s4, s1, 0x80000 -; GFX9-DL-NEXT: s_and_b32 s3, s2, s3 -; GFX9-DL-NEXT: s_and_b32 s2, s2, s4 -; GFX9-DL-NEXT: s_bfe_u32 s5, s0, 0x80008 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s6 -; GFX9-DL-NEXT: s_bfe_u32 s8, s1, 0x80010 -; GFX9-DL-NEXT: v_mov_b32_e32 v5, s2 -; GFX9-DL-NEXT: s_bfe_u32 s7, s0, 0x80010 -; GFX9-DL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-DL-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-DL-NEXT: s_lshr_b32 s0, s0, 24 +; GFX9-DL-NEXT: s_bfe_i32 s0, s2, 0x80000 +; GFX9-DL-NEXT: s_bfe_u32 s4, s3, 0x80008 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX9-DL-NEXT: s_bfe_i32 s1, s3, 0x80000 +; GFX9-DL-NEXT: s_lshr_b32 s3, s3, 24 +; GFX9-DL-NEXT: v_mov_b32_e32 v5, s3 +; GFX9-DL-NEXT: s_mov_b32 s3, 0xffff +; GFX9-DL-NEXT: s_bfe_u32 s5, s2, 0x80008 +; GFX9-DL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-DL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-DL-NEXT: s_bfe_u32 s6, s2, 0x80010 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 24 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s5, v3, v2 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s3, v5, v2 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s7, v4, v2 ; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s6, v4, v2 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s2, v5, v2 ; GFX9-DL-NEXT: global_store_short v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -1410,30 +1410,30 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i8 s7, s4 -; GFX7-NEXT: s_sext_i32_i8 s8, s5 -; GFX7-NEXT: s_bfe_i32 s10, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v0, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_i32 s12, s5, 0x80010 -; GFX7-NEXT: v_mad_i32_i24 v0, s7, v0, v1 -; GFX7-NEXT: s_bfe_i32 s9, s4, 0x80008 +; GFX7-NEXT: s_sext_i32_i8 s5, s2 +; GFX7-NEXT: s_sext_i32_i8 s6, s3 +; GFX7-NEXT: s_bfe_i32 s8, s3, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: s_bfe_i32 s10, s3, 0x80010 +; GFX7-NEXT: v_mad_i32_i24 v0, s5, v0, v1 +; GFX7-NEXT: s_bfe_i32 s7, s2, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: s_bfe_i32 s9, s2, 0x80010 +; GFX7-NEXT: v_mad_i32_i24 v0, s7, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s10 -; GFX7-NEXT: s_bfe_i32 s11, s4, 0x80010 +; GFX7-NEXT: s_ashr_i32 s3, s3, 24 ; GFX7-NEXT: v_mad_i32_i24 v0, s9, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s12 -; GFX7-NEXT: s_ashr_i32 s5, s5, 24 -; GFX7-NEXT: v_mad_i32_i24 v0, s11, v1, v0 -; GFX7-NEXT: s_ashr_i32 s4, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_i32_i24 v0, s4, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_ashr_i32 s2, s2, 24 +; GFX7-NEXT: v_mad_i32_i24 v0, s2, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1561,39 +1561,39 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX7-NEXT: s_mov_b32 s12, 0xffff +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_sext_i32_i8 s4, s2 +; GFX7-NEXT: s_sext_i32_i8 s5, s3 +; GFX7-NEXT: s_bfe_i32 s6, s3, 0x80008 +; GFX7-NEXT: s_bfe_i32 s7, s2, 0x80008 +; GFX7-NEXT: s_bfe_i32 s8, s3, 0x80010 +; GFX7-NEXT: s_ashr_i32 s10, s3, 24 +; GFX7-NEXT: s_bfe_i32 s9, s2, 0x80010 +; GFX7-NEXT: s_ashr_i32 s11, s2, 24 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i8 s6, s4 -; GFX7-NEXT: s_bfe_i32 s9, s4, 0x80008 -; GFX7-NEXT: s_sext_i32_i8 s7, s5 -; GFX7-NEXT: s_bfe_i32 s10, s5, 0x80008 -; GFX7-NEXT: s_and_b32 s7, s7, s8 -; GFX7-NEXT: s_bfe_i32 s12, s5, 0x80010 -; GFX7-NEXT: s_and_b32 s10, s10, s8 -; GFX7-NEXT: s_and_b32 s6, s6, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s7 -; GFX7-NEXT: s_bfe_i32 s11, s4, 0x80010 -; GFX7-NEXT: s_ashr_i32 s5, s5, 24 -; GFX7-NEXT: s_and_b32 s12, s12, s8 -; GFX7-NEXT: s_and_b32 s9, s9, s8 -; GFX7-NEXT: v_mov_b32_e32 v2, s10 -; GFX7-NEXT: s_ashr_i32 s4, s4, 24 -; GFX7-NEXT: s_and_b32 s11, s11, s8 -; GFX7-NEXT: s_and_b32 s5, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v3, s12 -; GFX7-NEXT: s_and_b32 s4, s4, s8 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 +; GFX7-NEXT: s_and_b32 s5, s5, s12 +; GFX7-NEXT: s_and_b32 s6, s6, s12 +; GFX7-NEXT: s_and_b32 s4, s4, s12 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_and_b32 s8, s8, s12 +; GFX7-NEXT: s_and_b32 s7, s7, s12 +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: s_and_b32 s9, s9, s12 +; GFX7-NEXT: s_and_b32 s10, s10, s12 +; GFX7-NEXT: v_mov_b32_e32 v3, s8 +; GFX7-NEXT: s_and_b32 s11, s11, s12 +; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s9, v3, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s10 +; GFX7-NEXT: v_mad_u32_u24 v0, s11, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1602,38 +1602,38 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_mov_b32 s0, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s5, s1, 8 -; GFX8-NEXT: s_lshr_b32 s6, s2, 8 -; GFX8-NEXT: s_sext_i32_i8 s4, s2 -; GFX8-NEXT: s_bfe_i32 s5, s5, 0x80000 -; GFX8-NEXT: s_bfe_i32 s6, s6, 0x80000 -; GFX8-NEXT: s_bfe_i32 s8, s2, 0x80010 -; GFX8-NEXT: s_lshr_b32 s2, s2, 24 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 -; GFX8-NEXT: s_sext_i32_i8 s3, s1 -; GFX8-NEXT: s_bfe_i32 s7, s1, 0x80010 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: s_and_b32 s4, s0, s5 -; GFX8-NEXT: s_and_b32 s5, s0, s6 +; GFX8-NEXT: s_lshr_b32 s0, s2, 8 +; GFX8-NEXT: s_lshr_b32 s1, s3, 8 +; GFX8-NEXT: s_lshr_b32 s6, s3, 24 +; GFX8-NEXT: s_sext_i32_i8 s4, s3 +; GFX8-NEXT: s_bfe_i32 s3, s3, 0x80010 +; GFX8-NEXT: v_mov_b32_e32 v4, s3 ; GFX8-NEXT: s_bfe_i32 s1, s1, 0x80000 -; GFX8-NEXT: s_bfe_i32 s2, s2, 0x80000 -; GFX8-NEXT: v_mov_b32_e32 v5, s5 -; GFX8-NEXT: s_and_b32 s1, s0, s1 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 -; GFX8-NEXT: s_and_b32 s0, s0, s2 +; GFX8-NEXT: s_mov_b32 s3, 0xffff +; GFX8-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_lshr_b32 s4, s2, 24 +; GFX8-NEXT: s_sext_i32_i8 s5, s2 +; GFX8-NEXT: s_and_b32 s1, s3, s1 +; GFX8-NEXT: s_bfe_i32 s4, s4, 0x80000 +; GFX8-NEXT: s_bfe_i32 s6, s6, 0x80000 +; GFX8-NEXT: s_and_b32 s0, s3, s0 +; GFX8-NEXT: s_and_b32 s4, s3, s4 +; GFX8-NEXT: s_bfe_i32 s2, s2, 0x80010 +; GFX8-NEXT: s_and_b32 s3, s3, s6 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_i32_i24 v2, s3, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v5, v2 -; GFX8-NEXT: v_mad_i32_i24 v2, s7, v4, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s0 -; GFX8-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s5, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s2, v4, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: v_mad_u32_u24 v2, s4, v3, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1642,38 +1642,38 @@ ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_mov_b32 s0, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_lshr_b32 s5, s1, 8 -; GFX9-NODL-NEXT: s_lshr_b32 s6, s2, 8 -; GFX9-NODL-NEXT: s_sext_i32_i8 s4, s2 -; GFX9-NODL-NEXT: s_bfe_i32 s5, s5, 0x80000 -; GFX9-NODL-NEXT: s_bfe_i32 s6, s6, 0x80000 -; GFX9-NODL-NEXT: s_bfe_i32 s8, s2, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s2, s2, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 -; GFX9-NODL-NEXT: s_sext_i32_i8 s3, s1 -; GFX9-NODL-NEXT: s_bfe_i32 s7, s1, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: s_and_b32 s4, s0, s5 -; GFX9-NODL-NEXT: s_and_b32 s5, s0, s6 +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 8 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 8 +; GFX9-NODL-NEXT: s_lshr_b32 s6, s3, 24 +; GFX9-NODL-NEXT: s_sext_i32_i8 s4, s3 +; GFX9-NODL-NEXT: s_bfe_i32 s3, s3, 0x80010 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s3 ; GFX9-NODL-NEXT: s_bfe_i32 s1, s1, 0x80000 -; GFX9-NODL-NEXT: s_bfe_i32 s2, s2, 0x80000 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-NODL-NEXT: s_and_b32 s1, s0, s1 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-NODL-NEXT: s_and_b32 s0, s0, s2 +; GFX9-NODL-NEXT: s_mov_b32 s3, 0xffff +; GFX9-NODL-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NODL-NEXT: s_lshr_b32 s4, s2, 24 +; GFX9-NODL-NEXT: s_sext_i32_i8 s5, s2 +; GFX9-NODL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-NODL-NEXT: s_bfe_i32 s4, s4, 0x80000 +; GFX9-NODL-NEXT: s_bfe_i32 s6, s6, 0x80000 +; GFX9-NODL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-NODL-NEXT: s_and_b32 s4, s3, s4 +; GFX9-NODL-NEXT: s_bfe_i32 s2, s2, 0x80010 +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s6 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s3, v3, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v5, v2 -; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s7, v4, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s5, v3, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: v_mad_i32_i24 v2, s2, v4, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v3, v2 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1682,38 +1682,38 @@ ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ushort v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-DL-NEXT: s_mov_b32 s0, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_lshr_b32 s5, s1, 8 -; GFX9-DL-NEXT: s_lshr_b32 s6, s2, 8 -; GFX9-DL-NEXT: s_sext_i32_i8 s4, s2 -; GFX9-DL-NEXT: s_bfe_i32 s5, s5, 0x80000 -; GFX9-DL-NEXT: s_bfe_i32 s6, s6, 0x80000 -; GFX9-DL-NEXT: s_bfe_i32 s8, s2, 0x80010 -; GFX9-DL-NEXT: s_lshr_b32 s2, s2, 24 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 -; GFX9-DL-NEXT: s_sext_i32_i8 s3, s1 -; GFX9-DL-NEXT: s_bfe_i32 s7, s1, 0x80010 -; GFX9-DL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-DL-NEXT: s_and_b32 s4, s0, s5 -; GFX9-DL-NEXT: s_and_b32 s5, s0, s6 +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 8 +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 8 +; GFX9-DL-NEXT: s_lshr_b32 s6, s3, 24 +; GFX9-DL-NEXT: s_sext_i32_i8 s4, s3 +; GFX9-DL-NEXT: s_bfe_i32 s3, s3, 0x80010 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s3 ; GFX9-DL-NEXT: s_bfe_i32 s1, s1, 0x80000 -; GFX9-DL-NEXT: s_bfe_i32 s2, s2, 0x80000 -; GFX9-DL-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-DL-NEXT: s_and_b32 s1, s0, s1 -; GFX9-DL-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-DL-NEXT: s_and_b32 s0, s0, s2 +; GFX9-DL-NEXT: s_mov_b32 s3, 0xffff +; GFX9-DL-NEXT: s_bfe_i32 s0, s0, 0x80000 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: s_lshr_b32 s4, s2, 24 +; GFX9-DL-NEXT: s_sext_i32_i8 s5, s2 +; GFX9-DL-NEXT: s_and_b32 s1, s3, s1 +; GFX9-DL-NEXT: s_bfe_i32 s4, s4, 0x80000 +; GFX9-DL-NEXT: s_bfe_i32 s6, s6, 0x80000 +; GFX9-DL-NEXT: s_and_b32 s0, s3, s0 +; GFX9-DL-NEXT: s_and_b32 s4, s3, s4 +; GFX9-DL-NEXT: s_bfe_i32 s2, s2, 0x80010 +; GFX9-DL-NEXT: s_and_b32 s3, s3, s6 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s3, v3, v2 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v5, v2 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s7, v4, v2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s1, v3, v2 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s5, v3, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s2, v4, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s3 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s4, v3, v2 ; GFX9-DL-NEXT: global_store_short v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -1760,31 +1760,31 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff +; GFX7-NEXT: s_movk_i32 s9, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s7, s4, s8 -; GFX7-NEXT: s_bfe_u32 s9, s4, 0x80008 -; GFX7-NEXT: s_and_b32 s6, s5, s8 -; GFX7-NEXT: s_bfe_u32 s8, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80010 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 -; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_lshr_b32 s5, s5, 24 -; GFX7-NEXT: v_mov_b32_e32 v3, s10 -; GFX7-NEXT: s_lshr_b32 s4, s4, 24 +; GFX7-NEXT: s_bfe_u32 s6, s4, 0x80008 +; GFX7-NEXT: s_bfe_u32 s2, s5, 0x80008 +; GFX7-NEXT: s_bfe_u32 s3, s5, 0x80010 +; GFX7-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-NEXT: s_lshr_b32 s2, s5, 24 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 +; GFX7-NEXT: s_and_b32 s5, s5, s9 +; GFX7-NEXT: s_bfe_u32 s7, s4, 0x80010 +; GFX7-NEXT: s_lshr_b32 s8, s4, 24 +; GFX7-NEXT: s_and_b32 s4, s4, s9 +; GFX7-NEXT: v_mov_b32_e32 v4, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_mad_u32_u24 v3, s4, v4, v3 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v0, v3 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v3, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s8, v2, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -1792,31 +1792,31 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s4, s0, 0x80008 -; GFX8-NEXT: s_and_b32 s3, s1, s2 -; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80008 -; GFX8-NEXT: s_and_b32 s2, s0, s2 -; GFX8-NEXT: v_mov_b32_e32 v3, s3 -; GFX8-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX8-NEXT: v_mov_b32_e32 v4, s5 -; GFX8-NEXT: s_bfe_u32 s7, s0, 0x80010 -; GFX8-NEXT: s_lshr_b32 s1, s1, 24 -; GFX8-NEXT: v_mov_b32_e32 v5, s6 -; GFX8-NEXT: s_lshr_b32 s0, s0, 24 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s7, v5, v2 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX8-NEXT: s_lshr_b32 s5, s3, 24 +; GFX8-NEXT: v_mov_b32_e32 v5, s5 +; GFX8-NEXT: s_movk_i32 s5, 0xff +; GFX8-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX8-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX8-NEXT: s_and_b32 s3, s3, s5 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_bfe_u32 s1, s2, 0x80010 +; GFX8-NEXT: s_lshr_b32 s4, s2, 24 +; GFX8-NEXT: s_and_b32 s2, s2, s5 +; GFX8-NEXT: v_mov_b32_e32 v6, s3 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v6, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s1, v4, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s4, v5, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -1824,31 +1824,31 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_movk_i32 s2, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NODL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NODL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_bfe_u32 s4, s0, 0x80008 -; GFX9-NODL-NEXT: s_and_b32 s3, s1, s2 -; GFX9-NODL-NEXT: s_bfe_u32 s5, s1, 0x80008 -; GFX9-NODL-NEXT: s_and_b32 s2, s0, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NODL-NEXT: s_bfe_u32 s6, s1, 0x80010 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s7, s0, 0x80010 -; GFX9-NODL-NEXT: s_lshr_b32 s1, s1, 24 -; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-NODL-NEXT: s_lshr_b32 s0, s0, 24 -; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v3, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v4, v2 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s7, v5, v2 +; GFX9-NODL-NEXT: s_bfe_u32 s0, s2, 0x80008 +; GFX9-NODL-NEXT: s_lshr_b32 s5, s3, 24 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NODL-NEXT: s_movk_i32 s5, 0xff +; GFX9-NODL-NEXT: s_bfe_u32 s1, s3, 0x80008 +; GFX9-NODL-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s5 ; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NODL-NEXT: s_bfe_u32 s1, s2, 0x80010 +; GFX9-NODL-NEXT: s_lshr_b32 s4, s2, 24 +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s5 +; GFX9-NODL-NEXT: v_mov_b32_e32 v6, s3 +; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s2, v6, v2 ; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s1, v4, v2 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s4, v5, v2 ; GFX9-NODL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -1904,31 +1904,31 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i8 s7, s4 -; GFX7-NEXT: s_sext_i32_i8 s8, s5 -; GFX7-NEXT: s_bfe_i32 s10, s5, 0x80008 -; GFX7-NEXT: v_mov_b32_e32 v0, s8 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: s_bfe_i32 s9, s4, 0x80008 -; GFX7-NEXT: v_mad_i32_i24 v1, s7, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v2, s10 -; GFX7-NEXT: s_bfe_i32 s12, s5, 0x80010 -; GFX7-NEXT: v_mad_i32_i24 v1, s9, v2, v1 -; GFX7-NEXT: s_bfe_i32 s11, s4, 0x80010 -; GFX7-NEXT: v_mad_i32_i24 v0, s7, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s12 -; GFX7-NEXT: s_ashr_i32 s5, s5, 24 -; GFX7-NEXT: v_mad_i32_i24 v0, s11, v1, v0 -; GFX7-NEXT: s_ashr_i32 s4, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mad_i32_i24 v0, s4, v1, v0 +; GFX7-NEXT: s_sext_i32_i8 s5, s2 +; GFX7-NEXT: s_sext_i32_i8 s6, s3 +; GFX7-NEXT: s_bfe_i32 s8, s3, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: s_bfe_i32 s7, s2, 0x80008 +; GFX7-NEXT: v_mad_i32_i24 v1, s5, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_bfe_i32 s10, s3, 0x80010 +; GFX7-NEXT: v_mad_i32_i24 v1, s7, v2, v1 +; GFX7-NEXT: s_bfe_i32 s9, s2, 0x80010 +; GFX7-NEXT: v_mad_i32_i24 v0, s5, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s10 +; GFX7-NEXT: s_ashr_i32 s3, s3, 24 +; GFX7-NEXT: v_mad_i32_i24 v0, s9, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: s_ashr_i32 s2, s2, 24 +; GFX7-NEXT: v_mad_i32_i24 v0, s2, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -2074,31 +2074,31 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_movk_i32 s12, 0xff -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_movk_i32 s10, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s7, s4, 24 -; GFX7-NEXT: s_lshr_b32 s9, s5, 24 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80008 -; GFX7-NEXT: s_bfe_u32 s13, s5, 0x80010 -; GFX7-NEXT: s_and_b32 s5, s5, s12 -; GFX7-NEXT: s_bfe_u32 s8, s4, 0x80008 -; GFX7-NEXT: s_bfe_u32 s11, s4, 0x80010 -; GFX7-NEXT: s_and_b32 s4, s4, s12 -; GFX7-NEXT: v_mov_b32_e32 v0, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s10 -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s13 -; GFX7-NEXT: v_mad_u32_u24 v0, s11, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 +; GFX7-NEXT: s_lshr_b32 s5, s2, 24 +; GFX7-NEXT: s_lshr_b32 s7, s3, 24 +; GFX7-NEXT: s_bfe_u32 s8, s3, 0x80008 +; GFX7-NEXT: s_bfe_u32 s11, s3, 0x80010 +; GFX7-NEXT: s_and_b32 s3, s3, s10 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: s_bfe_u32 s6, s2, 0x80008 +; GFX7-NEXT: s_bfe_u32 s9, s2, 0x80010 +; GFX7-NEXT: s_and_b32 s2, s2, s10 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s11 +; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s7 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -2106,30 +2106,30 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s6, s3, 24 -; GFX8-NEXT: s_bfe_u32 s8, s3, 0x80010 -; GFX8-NEXT: v_lshrrev_b16_e64 v0, 8, s3 -; GFX8-NEXT: s_and_b32 s3, s3, s2 -; GFX8-NEXT: s_and_b32 s2, s4, s2 -; GFX8-NEXT: v_mov_b32_e32 v2, s2 -; GFX8-NEXT: v_mov_b32_e32 v3, s5 -; GFX8-NEXT: s_bfe_u32 s9, s4, 0x80010 -; GFX8-NEXT: v_lshrrev_b16_e64 v1, 8, s4 -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v2, v3 -; GFX8-NEXT: v_mad_u32_u24 v0, v0, v1, v2 -; GFX8-NEXT: v_mov_b32_e32 v1, s9 -; GFX8-NEXT: s_lshr_b32 s7, s4, 24 -; GFX8-NEXT: v_mad_u32_u24 v0, s8, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s7 -; GFX8-NEXT: v_mad_u32_u24 v2, s6, v1, v0 +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: s_movk_i32 s6, 0xff ; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s1, s3, 24 +; GFX8-NEXT: s_bfe_u32 s7, s3, 0x80010 +; GFX8-NEXT: v_lshrrev_b16_e64 v3, 8, s3 +; GFX8-NEXT: s_and_b32 s3, s3, s6 +; GFX8-NEXT: s_lshr_b32 s0, s2, 24 +; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80010 +; GFX8-NEXT: v_lshrrev_b16_e64 v2, 8, s2 +; GFX8-NEXT: s_and_b32 s2, s2, s6 +; GFX8-NEXT: v_mov_b32_e32 v4, s3 +; GFX8-NEXT: v_mov_b32_e32 v5, s4 +; GFX8-NEXT: v_mad_u32_u24 v4, s2, v4, v5 +; GFX8-NEXT: v_mad_u32_u24 v2, v2, v3, v4 +; GFX8-NEXT: v_mov_b32_e32 v3, s7 +; GFX8-NEXT: v_mad_u32_u24 v2, s5, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -2137,30 +2137,30 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: s_movk_i32 s2, 0xff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NODL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_load_dword s5, s[0:1], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_lshr_b32 s6, s3, 24 -; GFX9-NODL-NEXT: s_bfe_u32 s8, s3, 0x80010 -; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v0, 8, s3 -; GFX9-NODL-NEXT: s_and_b32 s3, s3, s2 -; GFX9-NODL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-NODL-NEXT: s_bfe_u32 s9, s4, 0x80010 -; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v1, 8, s4 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s3, v2, v3 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, v0, v1, v2 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s9 -; GFX9-NODL-NEXT: s_lshr_b32 s7, s4, 24 -; GFX9-NODL-NEXT: v_mad_u32_u24 v0, s8, v1, v0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s6, v1, v0 +; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NODL-NEXT: s_movk_i32 s6, 0xff ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 24 +; GFX9-NODL-NEXT: s_bfe_u32 s7, s3, 0x80010 +; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v3, 8, s3 +; GFX9-NODL-NEXT: s_and_b32 s3, s3, s6 +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 24 +; GFX9-NODL-NEXT: s_bfe_u32 s5, s2, 0x80010 +; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v2, 8, s2 +; GFX9-NODL-NEXT: s_and_b32 s2, s2, s6 +; GFX9-NODL-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, s4 +; GFX9-NODL-NEXT: v_mad_u32_u24 v4, s2, v4, v5 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, v2, v3, v4 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s5, v3, v2 +; GFX9-NODL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-NODL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-NODL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -2168,30 +2168,30 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_movk_i32 s2, 0xff -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s5, s[0:1], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_lshr_b32 s6, s3, 24 -; GFX9-DL-NEXT: s_bfe_u32 s8, s3, 0x80010 -; GFX9-DL-NEXT: v_lshrrev_b16_e64 v0, 8, s3 -; GFX9-DL-NEXT: s_and_b32 s3, s3, s2 -; GFX9-DL-NEXT: s_and_b32 s2, s4, s2 -; GFX9-DL-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s5 -; GFX9-DL-NEXT: s_bfe_u32 s9, s4, 0x80010 -; GFX9-DL-NEXT: v_lshrrev_b16_e64 v1, 8, s4 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s3, v2, v3 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, v0, v1, v2 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s9 -; GFX9-DL-NEXT: s_lshr_b32 s7, s4, 24 -; GFX9-DL-NEXT: v_mad_u32_u24 v0, s8, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-DL-NEXT: v_mad_u32_u24 v2, s6, v1, v0 +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-DL-NEXT: s_movk_i32 s6, 0xff ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 24 +; GFX9-DL-NEXT: s_bfe_u32 s7, s3, 0x80010 +; GFX9-DL-NEXT: v_lshrrev_b16_e64 v3, 8, s3 +; GFX9-DL-NEXT: s_and_b32 s3, s3, s6 +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 24 +; GFX9-DL-NEXT: s_bfe_u32 s5, s2, 0x80010 +; GFX9-DL-NEXT: v_lshrrev_b16_e64 v2, 8, s2 +; GFX9-DL-NEXT: s_and_b32 s2, s2, s6 +; GFX9-DL-NEXT: v_mov_b32_e32 v4, s3 +; GFX9-DL-NEXT: v_mov_b32_e32 v5, s4 +; GFX9-DL-NEXT: v_mad_u32_u24 v4, s2, v4, v5 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, v2, v3, v4 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s7 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s5, v3, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 +; GFX9-DL-NEXT: v_mad_u32_u24 v2, s0, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -2225,30 +2225,30 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_ashr_i32 s7, s4, 24 -; GFX7-NEXT: s_ashr_i32 s10, s5, 24 -; GFX7-NEXT: s_bfe_i32 s11, s5, 0x80010 -; GFX7-NEXT: s_bfe_i32 s12, s5, 0x80008 -; GFX7-NEXT: s_sext_i32_i8 s5, s5 -; GFX7-NEXT: s_bfe_i32 s8, s4, 0x80010 -; GFX7-NEXT: s_bfe_i32 s9, s4, 0x80008 -; GFX7-NEXT: s_sext_i32_i8 s4, s4 -; GFX7-NEXT: v_mov_b32_e32 v0, s5 -; GFX7-NEXT: v_mov_b32_e32 v1, s6 -; GFX7-NEXT: v_mad_i32_i24 v0, s4, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s12 -; GFX7-NEXT: v_mad_i32_i24 v0, s9, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s11 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v1, v0 +; GFX7-NEXT: s_ashr_i32 s5, s2, 24 +; GFX7-NEXT: s_ashr_i32 s8, s3, 24 +; GFX7-NEXT: s_bfe_i32 s9, s3, 0x80010 +; GFX7-NEXT: s_bfe_i32 s10, s3, 0x80008 +; GFX7-NEXT: s_sext_i32_i8 s3, s3 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: s_bfe_i32 s6, s2, 0x80010 +; GFX7-NEXT: s_bfe_i32 s7, s2, 0x80008 +; GFX7-NEXT: s_sext_i32_i8 s2, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s4 +; GFX7-NEXT: v_mad_i32_i24 v0, s2, v0, v1 ; GFX7-NEXT: v_mov_b32_e32 v1, s10 ; GFX7-NEXT: v_mad_i32_i24 v0, s7, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s9 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: v_mad_i32_i24 v0, s5, v1, v0 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; @@ -2378,39 +2378,39 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 ; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 +; GFX7-NEXT: s_movk_i32 s7, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_and_b32 s11, s4, s8 -; GFX7-NEXT: s_bfe_u32 s6, s4, 0x80008 -; GFX7-NEXT: s_bfe_u32 s9, s5, 0x80008 -; GFX7-NEXT: s_lshr_b32 s10, s5, 24 -; GFX7-NEXT: s_and_b32 s8, s5, s8 -; GFX7-NEXT: v_mov_b32_e32 v4, s9 -; GFX7-NEXT: s_lshr_b32 s7, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v2, s10 -; GFX7-NEXT: s_bfe_u32 s5, s5, 0x80010 -; GFX7-NEXT: v_mov_b32_e32 v3, s8 -; GFX7-NEXT: v_mul_u32_u24_e32 v2, s7, v2 -; GFX7-NEXT: v_mul_u32_u24_e32 v4, s6, v4 -; GFX7-NEXT: s_bfe_u32 s4, s4, 0x80010 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mul_u32_u24_e32 v1, s4, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_mul_u32_u24_e32 v3, s11, v3 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_or_b32_e32 v2, v3, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX7-NEXT: s_bfe_u32 s2, s4, 0x80008 +; GFX7-NEXT: s_bfe_u32 s3, s5, 0x80008 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: v_mul_u32_u24_e32 v0, s2, v0 +; GFX7-NEXT: s_lshr_b32 s3, s5, 24 +; GFX7-NEXT: s_bfe_u32 s2, s5, 0x80010 +; GFX7-NEXT: v_mov_b32_e32 v1, s3 +; GFX7-NEXT: v_mov_b32_e32 v2, s2 +; GFX7-NEXT: s_bfe_u32 s3, s4, 0x80010 +; GFX7-NEXT: v_mul_u32_u24_e32 v2, s3, v2 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: buffer_load_ushort v3, off, s[0:3], 0 +; GFX7-NEXT: s_and_b32 s5, s5, s7 +; GFX7-NEXT: s_lshr_b32 s6, s4, 24 +; GFX7-NEXT: s_and_b32 s4, s4, s7 +; GFX7-NEXT: v_mov_b32_e32 v4, s5 +; GFX7-NEXT: v_mul_u32_u24_e32 v1, s6, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_mul_u32_u24_e32 v4, s4, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v4, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 @@ -2421,29 +2421,29 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_movk_i32 s0, 0xff +; GFX8-NEXT: s_movk_i32 s6, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s6, s1, s0 -; GFX8-NEXT: s_and_b32 s0, s2, s0 -; GFX8-NEXT: v_mov_b32_e32 v5, s0 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 +; GFX8-NEXT: s_bfe_u32 s0, s3, 0x80010 +; GFX8-NEXT: v_lshrrev_b16_e64 v4, 8, s3 +; GFX8-NEXT: s_lshr_b32 s1, s3, 24 +; GFX8-NEXT: s_and_b32 s3, s3, s6 ; GFX8-NEXT: v_lshrrev_b16_e64 v3, 8, s2 -; GFX8-NEXT: v_lshrrev_b16_e64 v4, 8, s1 -; GFX8-NEXT: s_lshr_b32 s4, s2, 24 -; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80010 -; GFX8-NEXT: v_mov_b32_e32 v6, s7 -; GFX8-NEXT: s_lshr_b32 s3, s1, 24 +; GFX8-NEXT: s_bfe_u32 s4, s2, 0x80010 +; GFX8-NEXT: s_lshr_b32 s5, s2, 24 +; GFX8-NEXT: s_and_b32 s2, s2, s6 +; GFX8-NEXT: v_mov_b32_e32 v5, s3 +; GFX8-NEXT: v_mov_b32_e32 v6, s0 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s6, v5, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, v4, v3, v2 -; GFX8-NEXT: v_mad_u32_u24 v2, s5, v6, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s4 -; GFX8-NEXT: v_mad_u32_u24 v2, s3, v3, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v5, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, v3, v4, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s4, v6, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s1 +; GFX8-NEXT: v_mad_u32_u24 v2, s5, v3, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -2451,35 +2451,35 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, 0xffff +; GFX9-NODL-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 -; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: s_lshr_b32 s5, s2, 16 -; GFX9-NODL-NEXT: s_lshr_b32 s7, s3, 16 -; GFX9-NODL-NEXT: s_lshr_b32 s4, s2, 24 -; GFX9-NODL-NEXT: v_and_b32_sdwa v4, v0, s5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NODL-NEXT: s_lshr_b32 s6, s3, 24 -; GFX9-NODL-NEXT: v_and_b32_sdwa v3, v0, s7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NODL-NEXT: v_lshl_or_b32 v3, s6, 16, v3 -; GFX9-NODL-NEXT: v_lshl_or_b32 v4, s4, 16, v4 -; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v3, v4, v3 -; GFX9-NODL-NEXT: v_and_b32_sdwa v4, v0, s3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v2, 8, s3 -; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v1, 8, s2 -; GFX9-NODL-NEXT: v_and_b32_sdwa v0, v0, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NODL-NEXT: v_lshl_or_b32 v2, v2, 16, v4 -; GFX9-NODL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v2, v0, v2 ; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NODL-NEXT: global_load_ushort v4, v[0:1], off +; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off +; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NODL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-NODL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-NODL-NEXT: s_lshr_b32 s4, s2, 24 +; GFX9-NODL-NEXT: v_and_b32_sdwa v7, v5, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NODL-NEXT: s_lshr_b32 s5, s3, 24 +; GFX9-NODL-NEXT: v_and_b32_sdwa v6, v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NODL-NEXT: v_lshl_or_b32 v6, s5, 16, v6 +; GFX9-NODL-NEXT: v_lshl_or_b32 v7, s4, 16, v7 +; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v6, v7, v6 +; GFX9-NODL-NEXT: v_and_b32_sdwa v7, v5, s3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v4, 8, s3 +; GFX9-NODL-NEXT: v_lshrrev_b16_e64 v3, 8, s2 +; GFX9-NODL-NEXT: v_and_b32_sdwa v5, v5, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NODL-NEXT: v_lshl_or_b32 v4, v4, 16, v7 +; GFX9-NODL-NEXT: v_lshl_or_b32 v3, v3, 16, v5 +; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v3, v3, v4 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_add_u32_e32 v4, v2, v4 -; GFX9-NODL-NEXT: v_add_u32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NODL-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX9-NODL-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-NODL-NEXT: v_add_u32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NODL-NEXT: v_add_u32_e32 v2, v2, v6 +; GFX9-NODL-NEXT: v_add_u32_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -2487,35 +2487,35 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, 0xffff +; GFX9-DL-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 -; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_lshr_b32 s5, s2, 16 -; GFX9-DL-NEXT: s_lshr_b32 s7, s3, 16 -; GFX9-DL-NEXT: s_lshr_b32 s4, s2, 24 -; GFX9-DL-NEXT: v_and_b32_sdwa v4, v0, s5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-DL-NEXT: s_lshr_b32 s6, s3, 24 -; GFX9-DL-NEXT: v_and_b32_sdwa v3, v0, s7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-DL-NEXT: v_lshl_or_b32 v3, s6, 16, v3 -; GFX9-DL-NEXT: v_lshl_or_b32 v4, s4, 16, v4 -; GFX9-DL-NEXT: v_pk_mul_lo_u16 v3, v4, v3 -; GFX9-DL-NEXT: v_and_b32_sdwa v4, v0, s3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-DL-NEXT: v_lshrrev_b16_e64 v2, 8, s3 -; GFX9-DL-NEXT: v_lshrrev_b16_e64 v1, 8, s2 -; GFX9-DL-NEXT: v_and_b32_sdwa v0, v0, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-DL-NEXT: v_lshl_or_b32 v2, v2, 16, v4 -; GFX9-DL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GFX9-DL-NEXT: v_pk_mul_lo_u16 v2, v0, v2 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-DL-NEXT: global_load_ushort v4, v[0:1], off +; GFX9-DL-NEXT: global_load_ushort v2, v[0:1], off +; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 16 +; GFX9-DL-NEXT: s_lshr_b32 s1, s3, 16 +; GFX9-DL-NEXT: s_lshr_b32 s4, s2, 24 +; GFX9-DL-NEXT: v_and_b32_sdwa v7, v5, s0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-DL-NEXT: s_lshr_b32 s5, s3, 24 +; GFX9-DL-NEXT: v_and_b32_sdwa v6, v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-DL-NEXT: v_lshl_or_b32 v6, s5, 16, v6 +; GFX9-DL-NEXT: v_lshl_or_b32 v7, s4, 16, v7 +; GFX9-DL-NEXT: v_pk_mul_lo_u16 v6, v7, v6 +; GFX9-DL-NEXT: v_and_b32_sdwa v7, v5, s3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-DL-NEXT: v_lshrrev_b16_e64 v4, 8, s3 +; GFX9-DL-NEXT: v_lshrrev_b16_e64 v3, 8, s2 +; GFX9-DL-NEXT: v_and_b32_sdwa v5, v5, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-DL-NEXT: v_lshl_or_b32 v4, v4, 16, v7 +; GFX9-DL-NEXT: v_lshl_or_b32 v3, v3, 16, v5 +; GFX9-DL-NEXT: v_pk_mul_lo_u16 v3, v3, v4 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) -; GFX9-DL-NEXT: v_add_u32_e32 v4, v2, v4 -; GFX9-DL-NEXT: v_add_u32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-DL-NEXT: v_add_u32_e32 v2, v2, v3 +; GFX9-DL-NEXT: v_add_u32_e32 v2, v3, v2 ; GFX9-DL-NEXT: v_add_u32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DL-NEXT: v_add_u32_e32 v2, v2, v6 +; GFX9-DL-NEXT: v_add_u32_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-DL-NEXT: global_store_short v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -2548,34 +2548,34 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_sext_i32_i8 s4, s2 +; GFX7-NEXT: s_sext_i32_i8 s5, s3 +; GFX7-NEXT: s_bfe_i32 s6, s3, 0x80008 +; GFX7-NEXT: s_bfe_i32 s8, s3, 0x80010 +; GFX7-NEXT: s_ashr_i32 s9, s3, 24 +; GFX7-NEXT: s_bfe_i32 s7, s2, 0x80008 +; GFX7-NEXT: s_bfe_i32 s10, s2, 0x80010 +; GFX7-NEXT: s_ashr_i32 s11, s2, 24 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_mov_b32 s8, 0xffff -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX7-NEXT: buffer_load_ushort v0, off, s[0:3], 0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_sext_i32_i8 s6, s4 -; GFX7-NEXT: s_bfe_i32 s7, s4, 0x80008 -; GFX7-NEXT: s_sext_i32_i8 s10, s5 -; GFX7-NEXT: s_bfe_i32 s11, s5, 0x80008 -; GFX7-NEXT: s_bfe_i32 s12, s5, 0x80010 -; GFX7-NEXT: s_ashr_i32 s5, s5, 24 -; GFX7-NEXT: v_mov_b32_e32 v3, s11 -; GFX7-NEXT: v_mov_b32_e32 v4, s10 -; GFX7-NEXT: s_bfe_i32 s9, s4, 0x80010 -; GFX7-NEXT: v_mov_b32_e32 v2, s12 -; GFX7-NEXT: s_ashr_i32 s4, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_mul_i32_i24_e32 v1, s4, v1 -; GFX7-NEXT: v_mul_i32_i24_e32 v2, s9, v2 +; GFX7-NEXT: v_mov_b32_e32 v1, s9 +; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: v_mov_b32_e32 v3, s6 +; GFX7-NEXT: v_mov_b32_e32 v4, s5 +; GFX7-NEXT: v_mul_i32_i24_e32 v1, s11, v1 ; GFX7-NEXT: v_mul_i32_i24_e32 v3, s7, v3 -; GFX7-NEXT: v_mul_i32_i24_e32 v4, s6, v4 +; GFX7-NEXT: v_mul_i32_i24_e32 v4, s4, v4 +; GFX7-NEXT: s_mov_b32 s4, 0xffff +; GFX7-NEXT: v_mul_i32_i24_e32 v2, s10, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s8, v2 +; GFX7-NEXT: v_and_b32_e32 v2, s4, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v4, s8, v4 +; GFX7-NEXT: v_and_b32_e32 v4, s4, v4 ; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX7-NEXT: v_or_b32_e32 v2, v4, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 @@ -2593,33 +2593,33 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_mov_b32 s0, 0xffff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-NEXT: s_bfe_i32 s6, s2, 0x80000 -; GFX8-NEXT: s_lshr_b32 s4, s2, 16 -; GFX8-NEXT: s_bfe_i32 s5, s1, 0x80000 -; GFX8-NEXT: v_ashrrev_i16_e64 v4, 8, s1 +; GFX8-NEXT: s_lshr_b32 s5, s2, 16 +; GFX8-NEXT: s_bfe_i32 s0, s2, 0x80000 +; GFX8-NEXT: v_ashrrev_i16_e64 v4, 8, s2 +; GFX8-NEXT: s_mov_b32 s2, 0xffff ; GFX8-NEXT: s_bfe_i32 s1, s3, 0x80000 -; GFX8-NEXT: v_ashrrev_i16_e64 v6, 8, s3 -; GFX8-NEXT: s_and_b32 s3, s0, s6 -; GFX8-NEXT: v_ashrrev_i16_e64 v3, 8, s2 -; GFX8-NEXT: s_bfe_i32 s2, s4, 0x80000 +; GFX8-NEXT: s_lshr_b32 s4, s3, 16 +; GFX8-NEXT: s_and_b32 s1, s2, s1 +; GFX8-NEXT: s_bfe_i32 s6, s4, 0x80000 +; GFX8-NEXT: s_and_b32 s0, s2, s0 +; GFX8-NEXT: v_mov_b32_e32 v7, s1 +; GFX8-NEXT: v_ashrrev_i16_e64 v3, 8, s3 +; GFX8-NEXT: s_bfe_i32 s7, s5, 0x80000 +; GFX8-NEXT: s_and_b32 s3, s2, s6 +; GFX8-NEXT: s_and_b32 s2, s2, s7 ; GFX8-NEXT: v_ashrrev_i16_e64 v5, 8, s4 -; GFX8-NEXT: s_and_b32 s4, s0, s5 -; GFX8-NEXT: v_mov_b32_e32 v7, s3 -; GFX8-NEXT: s_and_b32 s2, s0, s2 -; GFX8-NEXT: s_and_b32 s0, s0, s1 +; GFX8-NEXT: v_ashrrev_i16_e64 v6, 8, s5 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_u32_u24 v2, s4, v7, v2 +; GFX8-NEXT: v_mad_u32_u24 v2, s0, v7, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, v4, v3, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s2 -; GFX8-NEXT: v_mad_u32_u24 v2, s0, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s3 +; GFX8-NEXT: v_mad_u32_u24 v2, s2, v3, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, v6, v5, v2 ; GFX8-NEXT: flat_store_short v[0:1], v2 ; GFX8-NEXT: s_endpgm @@ -2628,39 +2628,39 @@ ; GFX9-NODL: ; %bb.0: ; %entry ; GFX9-NODL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NODL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NODL-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-NODL-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NODL-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX9-NODL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NODL-NEXT: global_load_ushort v2, v[0:1], off ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NODL-NEXT: s_lshr_b32 s4, s2, 16 ; GFX9-NODL-NEXT: s_lshr_b32 s5, s3, 16 -; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v3, 8, s5 -; GFX9-NODL-NEXT: s_bfe_i32 s5, s5, 0x80000 -; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v2, 8, s4 -; GFX9-NODL-NEXT: v_and_b32_e32 v5, s5, v4 -; GFX9-NODL-NEXT: s_bfe_i32 s4, s4, 0x80000 -; GFX9-NODL-NEXT: v_lshl_or_b32 v3, v3, 16, v5 -; GFX9-NODL-NEXT: v_and_b32_e32 v5, s4, v4 -; GFX9-NODL-NEXT: v_lshl_or_b32 v2, v2, 16, v5 -; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v1, 8, s3 -; GFX9-NODL-NEXT: s_bfe_i32 s3, s3, 0x80000 -; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v0, 8, s2 -; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v2, v2, v3 -; GFX9-NODL-NEXT: v_and_b32_e32 v3, s3, v4 -; GFX9-NODL-NEXT: s_bfe_i32 s2, s2, 0x80000 -; GFX9-NODL-NEXT: v_lshl_or_b32 v1, v1, 16, v3 -; GFX9-NODL-NEXT: v_and_b32_e32 v3, s2, v4 -; GFX9-NODL-NEXT: v_lshl_or_b32 v0, v0, 16, v3 -; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v3, v0, v1 -; GFX9-NODL-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NODL-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NODL-NEXT: global_load_ushort v4, v[0:1], off +; GFX9-NODL-NEXT: s_bfe_i32 s1, s3, 0x80000 +; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v4, 8, s3 +; GFX9-NODL-NEXT: s_bfe_i32 s3, s5, 0x80000 +; GFX9-NODL-NEXT: s_bfe_i32 s0, s2, 0x80000 +; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v3, 8, s2 +; GFX9-NODL-NEXT: s_bfe_i32 s2, s4, 0x80000 +; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v6, 8, s5 +; GFX9-NODL-NEXT: v_and_b32_e32 v8, s3, v7 +; GFX9-NODL-NEXT: v_lshl_or_b32 v6, v6, 16, v8 +; GFX9-NODL-NEXT: v_ashrrev_i16_e64 v5, 8, s4 +; GFX9-NODL-NEXT: v_and_b32_e32 v8, s2, v7 +; GFX9-NODL-NEXT: v_lshl_or_b32 v5, v5, 16, v8 +; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v5, v5, v6 +; GFX9-NODL-NEXT: v_and_b32_e32 v6, s1, v7 +; GFX9-NODL-NEXT: v_lshl_or_b32 v4, v4, 16, v6 +; GFX9-NODL-NEXT: v_and_b32_e32 v6, s0, v7 +; GFX9-NODL-NEXT: v_lshl_or_b32 v3, v3, 16, v6 +; GFX9-NODL-NEXT: v_pk_mul_lo_u16 v3, v3, v4 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_add_u32_e32 v4, v3, v4 -; GFX9-NODL-NEXT: v_add_u32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NODL-NEXT: v_add_u32_e32 v3, v3, v2 -; GFX9-NODL-NEXT: v_add_u32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NODL-NEXT: v_add_u32_e32 v2, v3, v2 +; GFX9-NODL-NEXT: v_add_u32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NODL-NEXT: v_add_u32_e32 v2, v2, v5 +; GFX9-NODL-NEXT: v_add_u32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-NODL-NEXT: global_store_short v[0:1], v2, off ; GFX9-NODL-NEXT: s_endpgm ; @@ -2668,39 +2668,39 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX9-DL-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 ; GFX9-DL-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: global_load_ushort v2, v[0:1], off ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DL-NEXT: s_lshr_b32 s4, s2, 16 ; GFX9-DL-NEXT: s_lshr_b32 s5, s3, 16 -; GFX9-DL-NEXT: v_ashrrev_i16_e64 v3, 8, s5 -; GFX9-DL-NEXT: s_bfe_i32 s5, s5, 0x80000 -; GFX9-DL-NEXT: v_ashrrev_i16_e64 v2, 8, s4 -; GFX9-DL-NEXT: v_and_b32_e32 v5, s5, v4 -; GFX9-DL-NEXT: s_bfe_i32 s4, s4, 0x80000 -; GFX9-DL-NEXT: v_lshl_or_b32 v3, v3, 16, v5 -; GFX9-DL-NEXT: v_and_b32_e32 v5, s4, v4 -; GFX9-DL-NEXT: v_lshl_or_b32 v2, v2, 16, v5 -; GFX9-DL-NEXT: v_ashrrev_i16_e64 v1, 8, s3 -; GFX9-DL-NEXT: s_bfe_i32 s3, s3, 0x80000 -; GFX9-DL-NEXT: v_ashrrev_i16_e64 v0, 8, s2 -; GFX9-DL-NEXT: v_pk_mul_lo_u16 v2, v2, v3 -; GFX9-DL-NEXT: v_and_b32_e32 v3, s3, v4 -; GFX9-DL-NEXT: s_bfe_i32 s2, s2, 0x80000 -; GFX9-DL-NEXT: v_lshl_or_b32 v1, v1, 16, v3 -; GFX9-DL-NEXT: v_and_b32_e32 v3, s2, v4 -; GFX9-DL-NEXT: v_lshl_or_b32 v0, v0, 16, v3 -; GFX9-DL-NEXT: v_pk_mul_lo_u16 v3, v0, v1 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-DL-NEXT: global_load_ushort v4, v[0:1], off +; GFX9-DL-NEXT: s_bfe_i32 s1, s3, 0x80000 +; GFX9-DL-NEXT: v_ashrrev_i16_e64 v4, 8, s3 +; GFX9-DL-NEXT: s_bfe_i32 s3, s5, 0x80000 +; GFX9-DL-NEXT: s_bfe_i32 s0, s2, 0x80000 +; GFX9-DL-NEXT: v_ashrrev_i16_e64 v3, 8, s2 +; GFX9-DL-NEXT: s_bfe_i32 s2, s4, 0x80000 +; GFX9-DL-NEXT: v_ashrrev_i16_e64 v6, 8, s5 +; GFX9-DL-NEXT: v_and_b32_e32 v8, s3, v7 +; GFX9-DL-NEXT: v_lshl_or_b32 v6, v6, 16, v8 +; GFX9-DL-NEXT: v_ashrrev_i16_e64 v5, 8, s4 +; GFX9-DL-NEXT: v_and_b32_e32 v8, s2, v7 +; GFX9-DL-NEXT: v_lshl_or_b32 v5, v5, 16, v8 +; GFX9-DL-NEXT: v_pk_mul_lo_u16 v5, v5, v6 +; GFX9-DL-NEXT: v_and_b32_e32 v6, s1, v7 +; GFX9-DL-NEXT: v_lshl_or_b32 v4, v4, 16, v6 +; GFX9-DL-NEXT: v_and_b32_e32 v6, s0, v7 +; GFX9-DL-NEXT: v_lshl_or_b32 v3, v3, 16, v6 +; GFX9-DL-NEXT: v_pk_mul_lo_u16 v3, v3, v4 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) -; GFX9-DL-NEXT: v_add_u32_e32 v4, v3, v4 -; GFX9-DL-NEXT: v_add_u32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-DL-NEXT: v_add_u32_e32 v3, v3, v2 -; GFX9-DL-NEXT: v_add_u32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DL-NEXT: v_add_u32_e32 v2, v3, v2 +; GFX9-DL-NEXT: v_add_u32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-DL-NEXT: v_add_u32_e32 v2, v2, v5 +; GFX9-DL-NEXT: v_add_u32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-DL-NEXT: global_store_short v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <4 x i8> addrspace(1)* %src2, @@ -2734,33 +2734,33 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s3, s[6:7], 0x0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_bfe_u32 s4, s2, 0x80008 +; GFX7-NEXT: s_bfe_u32 s5, s3, 0x80008 +; GFX7-NEXT: s_lshr_b32 s6, s3, 16 +; GFX7-NEXT: s_lshr_b32 s7, s3, 24 +; GFX7-NEXT: s_mul_i32 s10, s2, s3 +; GFX7-NEXT: s_lshr_b32 s8, s2, 16 +; GFX7-NEXT: s_lshr_b32 s9, s2, 24 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: s_movk_i32 s8, 0xff -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s4, s[4:5], 0x0 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 -; GFX7-NEXT: s_load_dword s5, s[6:7], 0x0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_u32 s6, s4, 0x80008 -; GFX7-NEXT: s_lshr_b32 s7, s4, 16 -; GFX7-NEXT: s_bfe_u32 s10, s5, 0x80008 -; GFX7-NEXT: s_lshr_b32 s11, s5, 16 -; GFX7-NEXT: s_lshr_b32 s12, s5, 24 -; GFX7-NEXT: v_mov_b32_e32 v2, s11 -; GFX7-NEXT: v_mov_b32_e32 v3, s10 -; GFX7-NEXT: s_lshr_b32 s9, s4, 24 -; GFX7-NEXT: v_mov_b32_e32 v1, s12 -; GFX7-NEXT: s_mul_i32 s4, s4, s5 +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: v_mov_b32_e32 v1, s7 +; GFX7-NEXT: v_mov_b32_e32 v3, s5 ; GFX7-NEXT: v_mul_u32_u24_e32 v1, s9, v1 -; GFX7-NEXT: v_mul_u32_u24_e32 v2, s7, v2 -; GFX7-NEXT: v_mul_u32_u24_e32 v3, s6, v3 -; GFX7-NEXT: s_and_b32 s4, s4, s8 +; GFX7-NEXT: v_mul_u32_u24_e32 v3, s4, v3 +; GFX7-NEXT: s_movk_i32 s6, 0xff +; GFX7-NEXT: v_mul_u32_u24_e32 v2, s8, v2 +; GFX7-NEXT: s_and_b32 s7, s10, s6 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s8, v2 +; GFX7-NEXT: v_and_b32_e32 v2, s6, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX7-NEXT: v_or_b32_e32 v2, s4, v3 +; GFX7-NEXT: v_or_b32_e32 v2, s7, v3 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 @@ -2780,32 +2780,32 @@ ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s3, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s1, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s3, s1, 24 -; GFX8-NEXT: s_lshr_b32 s4, s2, 24 -; GFX8-NEXT: s_and_b32 s6, s1, s0 -; GFX8-NEXT: s_bfe_u32 s7, s2, 0x80010 -; GFX8-NEXT: s_and_b32 s0, s2, s0 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_mov_b32_e32 v4, s2 -; GFX8-NEXT: v_mul_u32_u24_sdwa v3, v3, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 -; GFX8-NEXT: v_mov_b32_e32 v4, s0 -; GFX8-NEXT: s_bfe_u32 s5, s1, 0x80010 -; GFX8-NEXT: v_mov_b32_e32 v5, s7 -; GFX8-NEXT: v_mov_b32_e32 v6, s4 +; GFX8-NEXT: s_lshr_b32 s0, s2, 24 +; GFX8-NEXT: s_lshr_b32 s1, s3, 24 +; GFX8-NEXT: s_bfe_u32 s4, s3, 0x80010 +; GFX8-NEXT: v_mov_b32_e32 v4, s1 +; GFX8-NEXT: s_movk_i32 s1, 0xff +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: s_and_b32 s4, s3, s1 +; GFX8-NEXT: v_mov_b32_e32 v6, s2 ; GFX8-NEXT: v_mov_b32_e32 v7, s3 -; GFX8-NEXT: v_mul_u32_u24_e32 v4, s6, v4 -; GFX8-NEXT: v_mul_u32_u24_e32 v5, s5, v5 -; GFX8-NEXT: v_mul_u32_u24_sdwa v6, v7, v6 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: s_bfe_u32 s5, s2, 0x80010 +; GFX8-NEXT: v_mul_u32_u24_sdwa v6, v6, v7 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 +; GFX8-NEXT: v_mov_b32_e32 v7, s0 +; GFX8-NEXT: s_and_b32 s1, s2, s1 +; GFX8-NEXT: v_mov_b32_e32 v5, s4 +; GFX8-NEXT: v_mul_u32_u24_e32 v3, s5, v3 +; GFX8-NEXT: v_mul_u32_u24_sdwa v4, v7, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_mul_u32_u24_e32 v5, s1, v5 +; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v4, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v3 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 Index: llvm/test/CodeGen/AMDGPU/idot8.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/idot8.ll +++ llvm/test/CodeGen/AMDGPU/idot8.ll @@ -7,38 +7,32 @@ define amdgpu_kernel void @udot8_acc32(<8 x i4> addrspace(1)* %src1, ; GFX7-LABEL: udot8_acc32: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 -; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 -; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s2, s[8:9], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s8, s0, 28 -; GFX7-NEXT: s_lshr_b32 s15, s1, 28 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40008 -; GFX7-NEXT: s_bfe_u32 s21, s1, 0x40004 +; GFX7-NEXT: s_lshr_b32 s4, s0, 28 +; GFX7-NEXT: s_lshr_b32 s13, s1, 28 +; GFX7-NEXT: s_bfe_u32 s14, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40014 +; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s17, s1, 0x4000c +; GFX7-NEXT: s_bfe_u32 s18, s1, 0x40008 +; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40004 ; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s9, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s10, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40018 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s10, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40008 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40004 ; GFX7-NEXT: s_and_b32 s0, s0, 15 ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mad_u32_u24 v0, s0, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s21 -; GFX7-NEXT: v_mad_u32_u24 v0, s14, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s20 -; GFX7-NEXT: v_mad_u32_u24 v0, s13, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s19 ; GFX7-NEXT: v_mad_u32_u24 v0, s12, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s18 @@ -46,10 +40,16 @@ ; GFX7-NEXT: v_mov_b32_e32 v1, s17 ; GFX7-NEXT: v_mad_u32_u24 v0, s10, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s16 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s15 -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v1, v0 -; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s14 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s13 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: udot8_acc32: @@ -237,46 +237,46 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s11, 0xf000 -; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 28 -; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40008 -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40004 -; GFX7-NEXT: s_lshr_b32 s14, s1, 28 -; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s7, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40004 -; GFX7-NEXT: s_and_b32 s0, s0, 15 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mov_b32_e32 v3, s19 -; GFX7-NEXT: v_mov_b32_e32 v4, s18 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: v_mov_b32_e32 v6, s16 -; GFX7-NEXT: v_mov_b32_e32 v7, s15 +; GFX7-NEXT: s_and_b32 s2, s0, 15 +; GFX7-NEXT: s_bfe_u32 s10, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s11, s1, 0x40014 +; GFX7-NEXT: v_mov_b32_e32 v4, s10 +; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_load_ushort v6, off, s[8:11], 0 +; GFX7-NEXT: s_and_b32 s4, s1, 15 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_bfe_u32 s5, s1, 0x40004 +; GFX7-NEXT: s_bfe_u32 s6, s1, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s7, s1, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v3, s7 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s13, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40018 +; GFX7-NEXT: s_lshr_b32 s1, s1, 28 +; GFX7-NEXT: v_mov_b32_e32 v7, s13 +; GFX7-NEXT: s_lshr_b32 s0, s0, 28 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s13, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s12, v3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v0, v6 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v4, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v5, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v6, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v7, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s14 -; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s12, v5, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s14, v7, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; @@ -498,46 +498,46 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s11, 0xf000 -; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 ; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 28 -; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40008 -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40004 -; GFX7-NEXT: s_lshr_b32 s14, s1, 28 -; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s7, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40004 -; GFX7-NEXT: s_and_b32 s0, s0, 15 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mov_b32_e32 v3, s19 -; GFX7-NEXT: v_mov_b32_e32 v4, s18 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: v_mov_b32_e32 v6, s16 -; GFX7-NEXT: v_mov_b32_e32 v7, s15 +; GFX7-NEXT: s_and_b32 s2, s0, 15 +; GFX7-NEXT: s_bfe_u32 s10, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s11, s1, 0x40014 +; GFX7-NEXT: v_mov_b32_e32 v4, s10 +; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_load_ubyte v6, off, s[8:11], 0 +; GFX7-NEXT: s_and_b32 s4, s1, 15 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_bfe_u32 s5, s1, 0x40004 +; GFX7-NEXT: s_bfe_u32 s6, s1, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s7, s1, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v3, s7 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s13, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40018 +; GFX7-NEXT: s_lshr_b32 s1, s1, 28 +; GFX7-NEXT: v_mov_b32_e32 v7, s13 +; GFX7-NEXT: s_lshr_b32 s0, s0, 28 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s13, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s12, v3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v0, v6 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v4, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v5, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v6, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v7, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s14 -; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s12, v5, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s14, v7, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; @@ -759,46 +759,46 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s11, 0xf000 -; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 ; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 28 -; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40008 -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40004 -; GFX7-NEXT: s_lshr_b32 s14, s1, 28 -; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s7, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40004 -; GFX7-NEXT: s_and_b32 s0, s0, 15 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mov_b32_e32 v3, s19 -; GFX7-NEXT: v_mov_b32_e32 v4, s18 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: v_mov_b32_e32 v6, s16 -; GFX7-NEXT: v_mov_b32_e32 v7, s15 +; GFX7-NEXT: s_and_b32 s2, s0, 15 +; GFX7-NEXT: s_bfe_u32 s10, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s11, s1, 0x40014 +; GFX7-NEXT: v_mov_b32_e32 v4, s10 +; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_load_ubyte v6, off, s[8:11], 0 +; GFX7-NEXT: s_and_b32 s4, s1, 15 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_bfe_u32 s5, s1, 0x40004 +; GFX7-NEXT: s_bfe_u32 s6, s1, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s7, s1, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v3, s7 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s13, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40018 +; GFX7-NEXT: s_lshr_b32 s1, s1, 28 +; GFX7-NEXT: v_mov_b32_e32 v7, s13 +; GFX7-NEXT: s_lshr_b32 s0, s0, 28 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s13, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s12, v3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v0, v6 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v4, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v5, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v6, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v7, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s14 -; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s12, v5, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s14, v7, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm @@ -1014,46 +1014,46 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s11, 0xf000 -; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 ; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 28 -; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40008 -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40004 -; GFX7-NEXT: s_lshr_b32 s14, s1, 28 -; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s7, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40004 -; GFX7-NEXT: s_and_b32 s0, s0, 15 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mov_b32_e32 v3, s19 -; GFX7-NEXT: v_mov_b32_e32 v4, s18 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: v_mov_b32_e32 v6, s16 -; GFX7-NEXT: v_mov_b32_e32 v7, s15 +; GFX7-NEXT: s_and_b32 s2, s0, 15 +; GFX7-NEXT: s_bfe_u32 s10, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s11, s1, 0x40014 +; GFX7-NEXT: v_mov_b32_e32 v4, s10 +; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_load_ubyte v6, off, s[8:11], 0 +; GFX7-NEXT: s_and_b32 s4, s1, 15 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_bfe_u32 s5, s1, 0x40004 +; GFX7-NEXT: s_bfe_u32 s6, s1, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s7, s1, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v3, s7 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s13, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40018 +; GFX7-NEXT: s_lshr_b32 s1, s1, 28 +; GFX7-NEXT: v_mov_b32_e32 v7, s13 +; GFX7-NEXT: s_lshr_b32 s0, s0, 28 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s13, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s12, v3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v0, v6 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v4, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v5, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v6, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v7, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s14 -; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s12, v5, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s14, v7, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm @@ -1256,51 +1256,51 @@ define amdgpu_kernel void @udot8_multiuses_mul1(<8 x i4> addrspace(1)* %src1, ; GFX7-LABEL: udot8_multiuses_mul1: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 -; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 -; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s2, s[8:9], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s8, s0, 28 -; GFX7-NEXT: s_bfe_u32 s21, s1, 0x40004 -; GFX7-NEXT: s_lshr_b32 s15, s1, 28 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40008 +; GFX7-NEXT: s_lshr_b32 s4, s0, 28 +; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40004 +; GFX7-NEXT: s_lshr_b32 s13, s1, 28 +; GFX7-NEXT: s_bfe_u32 s14, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40014 +; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s17, s1, 0x4000c +; GFX7-NEXT: s_bfe_u32 s18, s1, 0x40008 ; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s9, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s10, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40018 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s10, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40008 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40004 ; GFX7-NEXT: s_and_b32 s0, s0, 15 ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mad_u32_u24 v1, s0, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v2, s21 -; GFX7-NEXT: v_mad_u32_u24 v0, s0, v0, v1 -; GFX7-NEXT: v_mad_u32_u24 v1, s14, v2, v1 -; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mad_u32_u24 v1, s13, v2, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, s19 +; GFX7-NEXT: v_mad_u32_u24 v0, s0, v0, v1 ; GFX7-NEXT: v_mad_u32_u24 v1, s12, v2, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, s18 ; GFX7-NEXT: v_mad_u32_u24 v1, s11, v2, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, s17 ; GFX7-NEXT: v_mad_u32_u24 v1, s10, v2, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, s16 -; GFX7-NEXT: v_mad_u32_u24 v1, s9, v2, v1 +; GFX7-NEXT: v_mad_u32_u24 v1, s7, v2, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, s15 -; GFX7-NEXT: v_mad_u32_u24 v1, s8, v2, v1 +; GFX7-NEXT: v_mad_u32_u24 v1, s6, v2, v1 +; GFX7-NEXT: v_mov_b32_e32 v2, s14 +; GFX7-NEXT: v_mad_u32_u24 v1, s5, v2, v1 +; GFX7-NEXT: v_mov_b32_e32 v2, s13 +; GFX7-NEXT: v_mad_u32_u24 v1, s4, v2, v1 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: udot8_multiuses_mul1: @@ -1522,38 +1522,32 @@ define amdgpu_kernel void @udot8_acc32_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX7-LABEL: udot8_acc32_vecMul: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 -; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 -; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s2, s[8:9], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s8, s0, 28 -; GFX7-NEXT: s_lshr_b32 s15, s1, 28 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40008 -; GFX7-NEXT: s_bfe_u32 s21, s1, 0x40004 +; GFX7-NEXT: s_lshr_b32 s4, s0, 28 +; GFX7-NEXT: s_lshr_b32 s13, s1, 28 +; GFX7-NEXT: s_bfe_u32 s14, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40014 +; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s17, s1, 0x4000c +; GFX7-NEXT: s_bfe_u32 s18, s1, 0x40008 +; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40004 ; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s9, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s10, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40018 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s10, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40008 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40004 ; GFX7-NEXT: s_and_b32 s0, s0, 15 ; GFX7-NEXT: v_mov_b32_e32 v0, s1 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 ; GFX7-NEXT: v_mad_u32_u24 v0, s0, v0, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, s21 -; GFX7-NEXT: v_mad_u32_u24 v0, s14, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s20 -; GFX7-NEXT: v_mad_u32_u24 v0, s13, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s19 ; GFX7-NEXT: v_mad_u32_u24 v0, s12, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s18 @@ -1561,10 +1555,16 @@ ; GFX7-NEXT: v_mov_b32_e32 v1, s17 ; GFX7-NEXT: v_mad_u32_u24 v0, s10, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s16 -; GFX7-NEXT: v_mad_u32_u24 v0, s9, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s7, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s15 -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v1, v0 -; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s14 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s13 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: udot8_acc32_vecMul: @@ -1717,61 +1717,61 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 -; GFX7-NEXT: buffer_load_ushort v0, off, s[4:7], 0 ; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40004 -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x40004 -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x4000c -; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mov_b32_e32 v4, s18 -; GFX7-NEXT: s_bfe_u32 s14, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40010 -; GFX7-NEXT: s_lshr_b32 s16, s1, 28 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40018 -; GFX7-NEXT: s_and_b32 s19, s1, 15 -; GFX7-NEXT: s_bfe_u32 s1, s1, 0x40008 -; GFX7-NEXT: v_mul_u32_u24_e32 v2, s13, v2 -; GFX7-NEXT: v_mul_u32_u24_e32 v4, s11, v4 -; GFX7-NEXT: s_bfe_u32 s2, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s8, s0, 0x40010 -; GFX7-NEXT: s_lshr_b32 s9, s0, 28 -; GFX7-NEXT: v_mov_b32_e32 v6, s16 -; GFX7-NEXT: s_bfe_u32 s10, s0, 0x40018 -; GFX7-NEXT: s_and_b32 s12, s0, 15 -; GFX7-NEXT: v_mov_b32_e32 v3, s19 -; GFX7-NEXT: s_bfe_u32 s0, s0, 0x40008 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: v_mul_u32_u24_e32 v6, s9, v6 -; GFX7-NEXT: v_mul_u32_u24_e32 v1, s0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GFX7-NEXT: v_mul_u32_u24_e32 v3, s12, v3 +; GFX7-NEXT: s_bfe_u32 s8, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s7, s1, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v0, s7 +; GFX7-NEXT: s_bfe_u32 s6, s1, 0x40004 +; GFX7-NEXT: s_and_b32 s9, s1, 15 +; GFX7-NEXT: s_bfe_u32 s10, s1, 0x40008 +; GFX7-NEXT: v_mul_u32_u24_e32 v0, s8, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s6 +; GFX7-NEXT: v_mov_b32_e32 v2, s10 +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v3, s9 +; GFX7-NEXT: s_and_b32 s6, s0, 15 +; GFX7-NEXT: v_mul_u32_u24_e32 v2, s7, v2 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_mul_u32_u24_e32 v3, s6, v3 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX7-NEXT: buffer_load_ushort v2, off, s[4:7], 0 +; GFX7-NEXT: s_bfe_u32 s2, s0, 0x40004 +; GFX7-NEXT: v_mul_u32_u24_e32 v1, s2, v1 +; GFX7-NEXT: s_bfe_u32 s2, s1, 0x40014 +; GFX7-NEXT: s_lshr_b32 s11, s1, 28 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX7-NEXT: s_bfe_u32 s9, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s8, s0, 0x40014 +; GFX7-NEXT: v_mov_b32_e32 v6, s2 +; GFX7-NEXT: s_lshr_b32 s12, s0, 28 +; GFX7-NEXT: v_mov_b32_e32 v4, s11 +; GFX7-NEXT: s_bfe_u32 s1, s1, 0x40018 +; GFX7-NEXT: v_mul_u32_u24_e32 v4, s12, v4 +; GFX7-NEXT: v_mul_u32_u24_e32 v6, s8, v6 +; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX7-NEXT: s_bfe_u32 s10, s0, 0x40010 +; GFX7-NEXT: v_mov_b32_e32 v5, s9 +; GFX7-NEXT: s_bfe_u32 s0, s0, 0x40018 +; GFX7-NEXT: v_mov_b32_e32 v3, s1 +; GFX7-NEXT: v_mul_u32_u24_e32 v3, s0, v3 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 -; GFX7-NEXT: v_or_b32_e32 v2, v3, v4 ; GFX7-NEXT: v_mul_u32_u24_e32 v5, s10, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX7-NEXT: v_mov_b32_e32 v8, s14 -; GFX7-NEXT: v_or_b32_e32 v3, v5, v6 -; GFX7-NEXT: v_alignbit_b32 v5, v1, v2, 16 -; GFX7-NEXT: v_mov_b32_e32 v7, s15 -; GFX7-NEXT: v_mul_u32_u24_e32 v8, s2, v8 -; GFX7-NEXT: v_mul_u32_u24_e32 v7, s8, v7 -; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v4, v7, v8 +; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX7-NEXT: v_or_b32_e32 v4, v5, v6 +; GFX7-NEXT: v_alignbit_b32 v5, v0, v1, 16 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v4 ; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v3 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v6, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v4, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v7, v0 @@ -1980,47 +1980,47 @@ ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 ; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 +; GFX7-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_bfe_u32 s2, s0, 0x4000c -; GFX7-NEXT: s_lshr_b32 s11, s0, 28 ; GFX7-NEXT: s_bfe_u32 s14, s1, 0x4000c ; GFX7-NEXT: s_lshr_b32 s18, s1, 28 ; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40014 -; GFX7-NEXT: v_mov_b32_e32 v4, s18 ; GFX7-NEXT: v_mov_b32_e32 v8, s14 ; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40008 ; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40004 ; GFX7-NEXT: s_and_b32 s17, s1, 15 ; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40018 ; GFX7-NEXT: s_bfe_u32 s1, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40014 +; GFX7-NEXT: s_lshr_b32 s8, s0, 28 +; GFX7-NEXT: v_mov_b32_e32 v4, s18 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40014 ; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mul_u32_u24_e32 v2, s13, v2 -; GFX7-NEXT: v_mul_u32_u24_e32 v4, s11, v4 +; GFX7-NEXT: v_mul_u32_u24_e32 v2, s12, v2 +; GFX7-NEXT: v_mul_u32_u24_e32 v4, s8, v4 ; GFX7-NEXT: v_mul_u32_u24_e32 v8, s2, v8 -; GFX7-NEXT: s_bfe_u32 s8, s0, 0x40008 +; GFX7-NEXT: s_bfe_u32 s9, s0, 0x40008 ; GFX7-NEXT: v_mov_b32_e32 v7, s15 -; GFX7-NEXT: s_bfe_u32 s9, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s10, s0, 0x40004 ; GFX7-NEXT: v_mov_b32_e32 v6, s16 -; GFX7-NEXT: s_and_b32 s10, s0, 15 -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40018 +; GFX7-NEXT: s_bfe_u32 s11, s0, 0x40018 ; GFX7-NEXT: v_mov_b32_e32 v3, s19 +; GFX7-NEXT: s_and_b32 s13, s0, 15 ; GFX7-NEXT: s_bfe_u32 s0, s0, 0x40010 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: v_mul_u32_u24_e32 v6, s9, v6 +; GFX7-NEXT: v_mul_u32_u24_e32 v6, s10, v6 ; GFX7-NEXT: v_mul_u32_u24_e32 v1, s0, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GFX7-NEXT: v_mul_u32_u24_e32 v3, s12, v3 -; GFX7-NEXT: v_mul_u32_u24_e32 v7, s8, v7 +; GFX7-NEXT: v_mul_u32_u24_e32 v3, s11, v3 +; GFX7-NEXT: v_mul_u32_u24_e32 v7, s9, v7 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8 ; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX7-NEXT: v_or_b32_e32 v2, v3, v4 ; GFX7-NEXT: v_or_b32_e32 v4, v7, v8 -; GFX7-NEXT: v_mul_u32_u24_e32 v5, s10, v5 +; GFX7-NEXT: v_mul_u32_u24_e32 v5, s13, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX7-NEXT: v_or_b32_e32 v3, v5, v6 @@ -2056,49 +2056,49 @@ ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_bfe_u32 s0, s2, 0x40004 ; GFX8-NEXT: s_bfe_u32 s1, s2, 0x4000c +; GFX8-NEXT: s_bfe_u32 s7, s4, 0x4000c ; GFX8-NEXT: s_bfe_u32 s5, s4, 0x40004 ; GFX8-NEXT: s_and_b32 s6, s4, 15 -; GFX8-NEXT: s_bfe_u32 s7, s4, 0x4000c ; GFX8-NEXT: s_bfe_u32 s8, s4, 0x40008 +; GFX8-NEXT: v_mov_b32_e32 v4, s7 +; GFX8-NEXT: v_mov_b32_e32 v11, s1 +; GFX8-NEXT: s_bfe_u32 s0, s2, 0x40004 ; GFX8-NEXT: v_mov_b32_e32 v3, s8 ; GFX8-NEXT: s_bfe_u32 s10, s2, 0x40008 -; GFX8-NEXT: v_mov_b32_e32 v4, s7 -; GFX8-NEXT: v_mov_b32_e32 v5, s1 -; GFX8-NEXT: v_mov_b32_e32 v6, s6 -; GFX8-NEXT: v_mov_b32_e32 v7, s5 -; GFX8-NEXT: v_mov_b32_e32 v8, s0 +; GFX8-NEXT: v_mov_b32_e32 v5, s6 ; GFX8-NEXT: s_and_b32 s9, s2, 15 -; GFX8-NEXT: v_mul_u32_u24_sdwa v4, v5, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v6, s5 +; GFX8-NEXT: v_mul_u32_u24_sdwa v4, v11, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v11, s0 ; GFX8-NEXT: v_mul_u32_u24_e32 v3, s10, v3 -; GFX8-NEXT: v_mul_u32_u24_e32 v5, s9, v6 -; GFX8-NEXT: v_mul_u32_u24_sdwa v6, v8, v7 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_mul_u32_u24_e32 v5, s9, v5 +; GFX8-NEXT: v_mul_u32_u24_sdwa v6, v11, v6 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: s_lshr_b32 s6, s2, 28 +; GFX8-NEXT: s_lshr_b32 s9, s4, 28 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v6 ; GFX8-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: s_bfe_u32 s0, s2, 0x40014 -; GFX8-NEXT: s_lshr_b32 s1, s2, 28 -; GFX8-NEXT: s_bfe_u32 s5, s4, 0x40014 -; GFX8-NEXT: s_bfe_u32 s6, s4, 0x40010 -; GFX8-NEXT: s_lshr_b32 s7, s4, 28 +; GFX8-NEXT: s_bfe_u32 s7, s4, 0x40014 +; GFX8-NEXT: s_bfe_u32 s8, s4, 0x40010 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x40018 -; GFX8-NEXT: s_bfe_u32 s8, s2, 0x40010 +; GFX8-NEXT: s_bfe_u32 s5, s2, 0x40014 +; GFX8-NEXT: v_mov_b32_e32 v8, s9 +; GFX8-NEXT: v_mov_b32_e32 v11, s6 +; GFX8-NEXT: s_bfe_u32 s10, s2, 0x40010 +; GFX8-NEXT: v_mov_b32_e32 v9, s8 +; GFX8-NEXT: v_mul_u32_u24_sdwa v8, v11, v8 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x40018 -; GFX8-NEXT: v_mov_b32_e32 v6, s4 -; GFX8-NEXT: v_mov_b32_e32 v7, s7 -; GFX8-NEXT: v_mov_b32_e32 v8, s1 -; GFX8-NEXT: v_mov_b32_e32 v9, s6 -; GFX8-NEXT: v_mov_b32_e32 v10, s5 -; GFX8-NEXT: v_mov_b32_e32 v11, s0 +; GFX8-NEXT: v_mov_b32_e32 v7, s4 +; GFX8-NEXT: v_mov_b32_e32 v10, s7 +; GFX8-NEXT: v_mov_b32_e32 v11, s5 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v3 -; GFX8-NEXT: v_mul_u32_u24_sdwa v7, v8, v7 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_mul_u32_u24_e32 v6, s2, v6 -; GFX8-NEXT: v_mul_u32_u24_e32 v8, s8, v9 -; GFX8-NEXT: v_mul_u32_u24_sdwa v9, v11, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v8, v8, v9 -; GFX8-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_sdwa v4, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_mul_u32_u24_e32 v7, s2, v7 +; GFX8-NEXT: v_mul_u32_u24_e32 v9, s10, v9 +; GFX8-NEXT: v_mul_u32_u24_sdwa v10, v11, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v9, v9, v10 +; GFX8-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v4, v9, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 8, v4 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v3 @@ -2276,46 +2276,46 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s11, 0xf000 -; GFX7-NEXT: s_mov_b32 s10, -1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX7-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 ; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshr_b32 s2, s0, 28 -; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40018 -; GFX7-NEXT: s_bfe_u32 s15, s1, 0x40018 -; GFX7-NEXT: s_bfe_u32 s16, s1, 0x40014 -; GFX7-NEXT: s_bfe_u32 s17, s1, 0x40010 -; GFX7-NEXT: s_bfe_u32 s18, s1, 0x4000c -; GFX7-NEXT: s_bfe_u32 s19, s1, 0x40008 -; GFX7-NEXT: s_bfe_u32 s20, s1, 0x40004 -; GFX7-NEXT: s_lshr_b32 s14, s1, 28 -; GFX7-NEXT: s_and_b32 s1, s1, 15 -; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40014 -; GFX7-NEXT: s_bfe_u32 s6, s0, 0x40010 -; GFX7-NEXT: s_bfe_u32 s7, s0, 0x4000c -; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40008 -; GFX7-NEXT: s_bfe_u32 s13, s0, 0x40004 -; GFX7-NEXT: s_and_b32 s0, s0, 15 -; GFX7-NEXT: v_mov_b32_e32 v1, s1 -; GFX7-NEXT: v_mov_b32_e32 v2, s20 -; GFX7-NEXT: v_mov_b32_e32 v3, s19 -; GFX7-NEXT: v_mov_b32_e32 v4, s18 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: v_mov_b32_e32 v6, s16 -; GFX7-NEXT: v_mov_b32_e32 v7, s15 +; GFX7-NEXT: s_and_b32 s2, s0, 15 +; GFX7-NEXT: s_bfe_u32 s10, s1, 0x40010 +; GFX7-NEXT: s_bfe_u32 s11, s1, 0x40014 +; GFX7-NEXT: v_mov_b32_e32 v4, s10 +; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_load_ubyte v6, off, s[8:11], 0 +; GFX7-NEXT: s_and_b32 s4, s1, 15 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_bfe_u32 s5, s1, 0x40004 +; GFX7-NEXT: s_bfe_u32 s6, s1, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_bfe_u32 s4, s0, 0x40004 +; GFX7-NEXT: s_bfe_u32 s7, s1, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v2, s6 +; GFX7-NEXT: s_bfe_u32 s5, s0, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v3, s7 +; GFX7-NEXT: s_bfe_u32 s6, s0, 0x4000c +; GFX7-NEXT: s_bfe_u32 s7, s0, 0x40010 +; GFX7-NEXT: s_bfe_u32 s13, s1, 0x40018 +; GFX7-NEXT: s_bfe_u32 s12, s0, 0x40014 +; GFX7-NEXT: s_bfe_u32 s14, s0, 0x40018 +; GFX7-NEXT: s_lshr_b32 s1, s1, 28 +; GFX7-NEXT: v_mov_b32_e32 v7, s13 +; GFX7-NEXT: s_lshr_b32 s0, s0, 28 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s13, v2, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s12, v3, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v0, v6 +; GFX7-NEXT: v_mad_u32_u24 v0, s4, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s5, v2, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s6, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s7, v4, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s6, v5, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s5, v6, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, s4, v7, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s14 -; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s12, v5, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s14, v7, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm @@ -2505,49 +2505,49 @@ define amdgpu_kernel void @idot8_acc32(<8 x i4> addrspace(1)* %src1, ; GFX7-LABEL: idot8_acc32: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 -; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 -; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s2, s[8:9], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_i32 s8, s0, 0x40000 -; GFX7-NEXT: s_bfe_i32 s9, s1, 0x40000 -; GFX7-NEXT: s_bfe_i32 s11, s1, 0x40004 -; GFX7-NEXT: v_mov_b32_e32 v0, s9 +; GFX7-NEXT: s_bfe_i32 s4, s0, 0x40000 +; GFX7-NEXT: s_bfe_i32 s5, s1, 0x40000 +; GFX7-NEXT: s_bfe_i32 s7, s1, 0x40004 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v0, v1 -; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40004 +; GFX7-NEXT: v_mad_i32_i24 v0, s4, v0, v1 +; GFX7-NEXT: s_bfe_i32 s6, s0, 0x40004 +; GFX7-NEXT: v_mov_b32_e32 v1, s7 +; GFX7-NEXT: s_bfe_i32 s11, s1, 0x40008 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s11 -; GFX7-NEXT: s_bfe_i32 s13, s1, 0x40008 +; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40008 +; GFX7-NEXT: s_bfe_i32 s13, s1, 0x4000c ; GFX7-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX7-NEXT: s_bfe_i32 s12, s0, 0x40008 +; GFX7-NEXT: s_bfe_i32 s12, s0, 0x4000c ; GFX7-NEXT: v_mov_b32_e32 v1, s13 -; GFX7-NEXT: s_bfe_i32 s15, s1, 0x4000c +; GFX7-NEXT: s_bfe_i32 s15, s1, 0x40010 ; GFX7-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX7-NEXT: s_bfe_i32 s14, s0, 0x4000c +; GFX7-NEXT: s_bfe_i32 s14, s0, 0x40010 ; GFX7-NEXT: v_mov_b32_e32 v1, s15 -; GFX7-NEXT: s_bfe_i32 s17, s1, 0x40010 +; GFX7-NEXT: s_bfe_i32 s17, s1, 0x40014 +; GFX7-NEXT: s_bfe_i32 s19, s1, 0x40018 ; GFX7-NEXT: v_mad_i32_i24 v0, s14, v1, v0 -; GFX7-NEXT: s_bfe_i32 s16, s0, 0x40010 +; GFX7-NEXT: s_bfe_i32 s16, s0, 0x40014 ; GFX7-NEXT: v_mov_b32_e32 v1, s17 -; GFX7-NEXT: s_bfe_i32 s19, s1, 0x40014 -; GFX7-NEXT: s_bfe_i32 s21, s1, 0x40018 +; GFX7-NEXT: s_bfe_i32 s18, s0, 0x40018 ; GFX7-NEXT: v_mad_i32_i24 v0, s16, v1, v0 -; GFX7-NEXT: s_bfe_i32 s18, s0, 0x40014 ; GFX7-NEXT: v_mov_b32_e32 v1, s19 -; GFX7-NEXT: s_bfe_i32 s20, s0, 0x40018 -; GFX7-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s21 ; GFX7-NEXT: s_ashr_i32 s1, s1, 28 -; GFX7-NEXT: v_mad_i32_i24 v0, s20, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s18, v1, v0 ; GFX7-NEXT: s_ashr_i32 s0, s0, 28 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mad_i32_i24 v0, s0, v1, v0 -; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: idot8_acc32: @@ -2737,60 +2737,60 @@ ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s6, -1 -; GFX7-NEXT: s_mov_b32 s0, 0xffff +; GFX7-NEXT: s_mov_b32 s21, 0xffff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s1, s[8:9], 0x0 +; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 +; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 ; GFX7-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; GFX7-NEXT: s_load_dword s2, s[10:11], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_bfe_i32 s2, s0, 0x40000 ; GFX7-NEXT: s_bfe_i32 s8, s1, 0x40000 -; GFX7-NEXT: s_bfe_i32 s10, s1, 0x40004 -; GFX7-NEXT: s_bfe_i32 s9, s2, 0x40000 -; GFX7-NEXT: s_bfe_i32 s11, s2, 0x40004 -; GFX7-NEXT: s_and_b32 s9, s9, s0 -; GFX7-NEXT: s_bfe_i32 s13, s2, 0x40008 -; GFX7-NEXT: s_and_b32 s11, s11, s0 -; GFX7-NEXT: s_and_b32 s8, s8, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: s_bfe_i32 s12, s1, 0x40008 -; GFX7-NEXT: s_bfe_i32 s15, s2, 0x4000c -; GFX7-NEXT: s_and_b32 s13, s13, s0 -; GFX7-NEXT: s_and_b32 s10, s10, s0 -; GFX7-NEXT: v_mov_b32_e32 v2, s11 -; GFX7-NEXT: s_bfe_i32 s14, s1, 0x4000c -; GFX7-NEXT: s_bfe_i32 s17, s2, 0x40010 -; GFX7-NEXT: s_and_b32 s15, s15, s0 -; GFX7-NEXT: s_and_b32 s12, s12, s0 -; GFX7-NEXT: v_mov_b32_e32 v3, s13 -; GFX7-NEXT: s_bfe_i32 s16, s1, 0x40010 -; GFX7-NEXT: s_bfe_i32 s19, s2, 0x40014 -; GFX7-NEXT: s_and_b32 s17, s17, s0 -; GFX7-NEXT: s_and_b32 s14, s14, s0 -; GFX7-NEXT: v_mov_b32_e32 v4, s15 -; GFX7-NEXT: s_bfe_i32 s21, s2, 0x40018 -; GFX7-NEXT: s_bfe_i32 s18, s1, 0x40014 -; GFX7-NEXT: s_and_b32 s19, s19, s0 -; GFX7-NEXT: s_and_b32 s16, s16, s0 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: s_bfe_i32 s20, s1, 0x40018 -; GFX7-NEXT: s_ashr_i32 s2, s2, 28 -; GFX7-NEXT: s_and_b32 s21, s21, s0 -; GFX7-NEXT: s_and_b32 s18, s18, s0 -; GFX7-NEXT: v_mov_b32_e32 v6, s19 +; GFX7-NEXT: s_bfe_i32 s9, s1, 0x40004 +; GFX7-NEXT: s_and_b32 s8, s8, s21 +; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40004 +; GFX7-NEXT: s_bfe_i32 s11, s1, 0x40008 +; GFX7-NEXT: s_and_b32 s9, s9, s21 +; GFX7-NEXT: s_and_b32 s2, s2, s21 +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: s_bfe_i32 s12, s0, 0x40008 +; GFX7-NEXT: s_bfe_i32 s13, s1, 0x4000c +; GFX7-NEXT: s_and_b32 s11, s11, s21 +; GFX7-NEXT: s_and_b32 s10, s10, s21 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 +; GFX7-NEXT: s_bfe_i32 s14, s0, 0x4000c +; GFX7-NEXT: s_bfe_i32 s15, s1, 0x40010 +; GFX7-NEXT: s_and_b32 s13, s13, s21 +; GFX7-NEXT: s_and_b32 s12, s12, s21 +; GFX7-NEXT: v_mov_b32_e32 v3, s11 +; GFX7-NEXT: s_bfe_i32 s16, s0, 0x40010 +; GFX7-NEXT: s_bfe_i32 s17, s1, 0x40014 +; GFX7-NEXT: s_and_b32 s15, s15, s21 +; GFX7-NEXT: s_and_b32 s14, s14, s21 +; GFX7-NEXT: v_mov_b32_e32 v4, s13 +; GFX7-NEXT: s_bfe_i32 s19, s1, 0x40018 +; GFX7-NEXT: s_bfe_i32 s18, s0, 0x40014 +; GFX7-NEXT: s_and_b32 s17, s17, s21 +; GFX7-NEXT: s_and_b32 s16, s16, s21 +; GFX7-NEXT: v_mov_b32_e32 v5, s15 +; GFX7-NEXT: s_bfe_i32 s20, s0, 0x40018 ; GFX7-NEXT: s_ashr_i32 s1, s1, 28 -; GFX7-NEXT: s_and_b32 s20, s20, s0 -; GFX7-NEXT: s_and_b32 s2, s2, s0 -; GFX7-NEXT: v_mov_b32_e32 v7, s21 -; GFX7-NEXT: s_and_b32 s0, s1, s0 +; GFX7-NEXT: s_and_b32 s19, s19, s21 +; GFX7-NEXT: s_and_b32 s18, s18, s21 +; GFX7-NEXT: v_mov_b32_e32 v6, s17 +; GFX7-NEXT: s_ashr_i32 s0, s0, 28 +; GFX7-NEXT: s_and_b32 s20, s20, s21 +; GFX7-NEXT: s_and_b32 s1, s1, s21 +; GFX7-NEXT: v_mov_b32_e32 v7, s19 +; GFX7-NEXT: s_and_b32 s0, s0, s21 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s10, v2, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s12, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s14, v4, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s16, v5, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s18, v6, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s20, v7, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm @@ -3050,60 +3050,60 @@ ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s6, -1 -; GFX7-NEXT: s_movk_i32 s0, 0xff +; GFX7-NEXT: s_movk_i32 s21, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s1, s[8:9], 0x0 +; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 +; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 -; GFX7-NEXT: s_load_dword s2, s[10:11], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_bfe_i32 s2, s0, 0x40000 ; GFX7-NEXT: s_bfe_i32 s8, s1, 0x40000 -; GFX7-NEXT: s_bfe_i32 s10, s1, 0x40004 -; GFX7-NEXT: s_bfe_i32 s9, s2, 0x40000 -; GFX7-NEXT: s_bfe_i32 s11, s2, 0x40004 -; GFX7-NEXT: s_and_b32 s9, s9, s0 -; GFX7-NEXT: s_bfe_i32 s13, s2, 0x40008 -; GFX7-NEXT: s_and_b32 s11, s11, s0 -; GFX7-NEXT: s_and_b32 s8, s8, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s9 -; GFX7-NEXT: s_bfe_i32 s12, s1, 0x40008 -; GFX7-NEXT: s_bfe_i32 s15, s2, 0x4000c -; GFX7-NEXT: s_and_b32 s13, s13, s0 -; GFX7-NEXT: s_and_b32 s10, s10, s0 -; GFX7-NEXT: v_mov_b32_e32 v2, s11 -; GFX7-NEXT: s_bfe_i32 s14, s1, 0x4000c -; GFX7-NEXT: s_bfe_i32 s17, s2, 0x40010 -; GFX7-NEXT: s_and_b32 s15, s15, s0 -; GFX7-NEXT: s_and_b32 s12, s12, s0 -; GFX7-NEXT: v_mov_b32_e32 v3, s13 -; GFX7-NEXT: s_bfe_i32 s16, s1, 0x40010 -; GFX7-NEXT: s_bfe_i32 s19, s2, 0x40014 -; GFX7-NEXT: s_and_b32 s17, s17, s0 -; GFX7-NEXT: s_and_b32 s14, s14, s0 -; GFX7-NEXT: v_mov_b32_e32 v4, s15 -; GFX7-NEXT: s_bfe_i32 s21, s2, 0x40018 -; GFX7-NEXT: s_bfe_i32 s18, s1, 0x40014 -; GFX7-NEXT: s_and_b32 s19, s19, s0 -; GFX7-NEXT: s_and_b32 s16, s16, s0 -; GFX7-NEXT: v_mov_b32_e32 v5, s17 -; GFX7-NEXT: s_bfe_i32 s20, s1, 0x40018 -; GFX7-NEXT: s_ashr_i32 s2, s2, 28 -; GFX7-NEXT: s_and_b32 s21, s21, s0 -; GFX7-NEXT: s_and_b32 s18, s18, s0 -; GFX7-NEXT: v_mov_b32_e32 v6, s19 +; GFX7-NEXT: s_bfe_i32 s9, s1, 0x40004 +; GFX7-NEXT: s_and_b32 s8, s8, s21 +; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40004 +; GFX7-NEXT: s_bfe_i32 s11, s1, 0x40008 +; GFX7-NEXT: s_and_b32 s9, s9, s21 +; GFX7-NEXT: s_and_b32 s2, s2, s21 +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: s_bfe_i32 s12, s0, 0x40008 +; GFX7-NEXT: s_bfe_i32 s13, s1, 0x4000c +; GFX7-NEXT: s_and_b32 s11, s11, s21 +; GFX7-NEXT: s_and_b32 s10, s10, s21 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 +; GFX7-NEXT: s_bfe_i32 s14, s0, 0x4000c +; GFX7-NEXT: s_bfe_i32 s15, s1, 0x40010 +; GFX7-NEXT: s_and_b32 s13, s13, s21 +; GFX7-NEXT: s_and_b32 s12, s12, s21 +; GFX7-NEXT: v_mov_b32_e32 v3, s11 +; GFX7-NEXT: s_bfe_i32 s16, s0, 0x40010 +; GFX7-NEXT: s_bfe_i32 s17, s1, 0x40014 +; GFX7-NEXT: s_and_b32 s15, s15, s21 +; GFX7-NEXT: s_and_b32 s14, s14, s21 +; GFX7-NEXT: v_mov_b32_e32 v4, s13 +; GFX7-NEXT: s_bfe_i32 s19, s1, 0x40018 +; GFX7-NEXT: s_bfe_i32 s18, s0, 0x40014 +; GFX7-NEXT: s_and_b32 s17, s17, s21 +; GFX7-NEXT: s_and_b32 s16, s16, s21 +; GFX7-NEXT: v_mov_b32_e32 v5, s15 +; GFX7-NEXT: s_bfe_i32 s20, s0, 0x40018 ; GFX7-NEXT: s_ashr_i32 s1, s1, 28 -; GFX7-NEXT: s_and_b32 s20, s20, s0 -; GFX7-NEXT: s_and_b32 s2, s2, s0 -; GFX7-NEXT: v_mov_b32_e32 v7, s21 -; GFX7-NEXT: s_and_b32 s0, s1, s0 +; GFX7-NEXT: s_and_b32 s19, s19, s21 +; GFX7-NEXT: s_and_b32 s18, s18, s21 +; GFX7-NEXT: v_mov_b32_e32 v6, s17 +; GFX7-NEXT: s_ashr_i32 s0, s0, 28 +; GFX7-NEXT: s_and_b32 s20, s20, s21 +; GFX7-NEXT: s_and_b32 s1, s1, s21 +; GFX7-NEXT: v_mov_b32_e32 v7, s19 +; GFX7-NEXT: s_and_b32 s0, s0, s21 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mad_u32_u24 v0, s8, v1, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, s2, v1, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s10, v2, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s12, v3, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s14, v4, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s16, v5, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s18, v6, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, s20, v7, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, s2 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 ; GFX7-NEXT: v_mad_u32_u24 v0, s0, v1, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm @@ -3112,67 +3112,67 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_movk_i32 s2, 0xff ; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s4, s[6:7], 0x0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ubyte v2, v[0:1] -; GFX8-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX8-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s7, s0, 4 -; GFX8-NEXT: s_lshr_b32 s11, s1, 4 -; GFX8-NEXT: v_lshlrev_b16_e64 v3, 12, s7 -; GFX8-NEXT: v_lshlrev_b16_e64 v4, 12, s11 -; GFX8-NEXT: s_bfe_i32 s13, s1, 0x40000 +; GFX8-NEXT: s_lshr_b32 s0, s2, 4 +; GFX8-NEXT: s_lshr_b32 s1, s4, 4 +; GFX8-NEXT: v_lshlrev_b16_e64 v3, 12, s0 +; GFX8-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX8-NEXT: s_bfe_i32 s5, s4, 0x40000 +; GFX8-NEXT: s_lshr_b32 s0, s2, 12 +; GFX8-NEXT: v_lshlrev_b16_e64 v6, 12, s0 +; GFX8-NEXT: s_lshr_b32 s1, s4, 12 +; GFX8-NEXT: s_bfe_i32 s7, s4, 0x40008 +; GFX8-NEXT: s_bfe_i32 s6, s2, 0x40000 +; GFX8-NEXT: v_mov_b32_e32 v12, s5 ; GFX8-NEXT: v_ashrrev_i16_e32 v3, 12, v3 +; GFX8-NEXT: s_movk_i32 s0, 0xff ; GFX8-NEXT: v_ashrrev_i16_e32 v4, 12, v4 -; GFX8-NEXT: s_lshr_b32 s6, s0, 12 -; GFX8-NEXT: s_lshr_b32 s10, s1, 12 -; GFX8-NEXT: s_bfe_i32 s15, s1, 0x40008 -; GFX8-NEXT: s_bfe_i32 s12, s0, 0x40000 -; GFX8-NEXT: v_mov_b32_e32 v12, s13 -; GFX8-NEXT: v_lshlrev_b16_e64 v6, 12, s6 -; GFX8-NEXT: v_lshlrev_b16_e64 v7, 12, s10 -; GFX8-NEXT: s_bfe_i32 s14, s0, 0x40008 -; GFX8-NEXT: v_mov_b32_e32 v5, s15 -; GFX8-NEXT: v_and_b32_e32 v3, s2, v3 -; GFX8-NEXT: v_and_b32_e32 v4, s2, v4 +; GFX8-NEXT: v_lshlrev_b16_e64 v7, 12, s1 +; GFX8-NEXT: s_bfe_i32 s8, s2, 0x40008 +; GFX8-NEXT: v_mov_b32_e32 v5, s7 +; GFX8-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX8-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX8-NEXT: v_ashrrev_i16_e32 v6, 12, v6 ; GFX8-NEXT: v_ashrrev_i16_e32 v7, 12, v7 -; GFX8-NEXT: s_lshr_b32 s5, s0, 20 -; GFX8-NEXT: s_lshr_b32 s9, s1, 20 -; GFX8-NEXT: v_mul_i32_i24_e32 v5, s14, v5 -; GFX8-NEXT: v_lshlrev_b16_e64 v8, 12, s5 -; GFX8-NEXT: v_lshlrev_b16_e64 v9, 12, s9 -; GFX8-NEXT: s_bfe_i32 s17, s1, 0x40010 -; GFX8-NEXT: v_and_b32_e32 v6, s2, v6 -; GFX8-NEXT: v_and_b32_e32 v7, s2, v7 -; GFX8-NEXT: s_lshr_b32 s8, s1, 28 +; GFX8-NEXT: s_lshr_b32 s9, s2, 20 +; GFX8-NEXT: s_lshr_b32 s10, s4, 20 +; GFX8-NEXT: v_mul_i32_i24_e32 v5, s8, v5 +; GFX8-NEXT: v_lshlrev_b16_e64 v8, 12, s9 +; GFX8-NEXT: v_lshlrev_b16_e64 v9, 12, s10 +; GFX8-NEXT: s_bfe_i32 s11, s4, 0x40010 +; GFX8-NEXT: v_and_b32_e32 v6, s0, v6 +; GFX8-NEXT: v_and_b32_e32 v7, s0, v7 +; GFX8-NEXT: s_lshr_b32 s13, s4, 28 ; GFX8-NEXT: v_ashrrev_i16_e32 v8, 12, v8 ; GFX8-NEXT: v_ashrrev_i16_e32 v9, 12, v9 -; GFX8-NEXT: s_lshr_b32 s4, s0, 28 -; GFX8-NEXT: s_bfe_i32 s16, s0, 0x40010 -; GFX8-NEXT: v_mov_b32_e32 v13, s17 -; GFX8-NEXT: v_lshlrev_b16_e64 v10, 12, s4 -; GFX8-NEXT: v_lshlrev_b16_e64 v11, 12, s8 -; GFX8-NEXT: s_bfe_i32 s1, s1, 0x40018 -; GFX8-NEXT: v_and_b32_e32 v8, s2, v8 -; GFX8-NEXT: v_and_b32_e32 v9, s2, v9 +; GFX8-NEXT: s_lshr_b32 s12, s2, 28 +; GFX8-NEXT: s_bfe_i32 s14, s2, 0x40010 +; GFX8-NEXT: v_mov_b32_e32 v13, s11 +; GFX8-NEXT: v_lshlrev_b16_e64 v10, 12, s12 +; GFX8-NEXT: v_lshlrev_b16_e64 v11, 12, s13 +; GFX8-NEXT: s_bfe_i32 s4, s4, 0x40018 +; GFX8-NEXT: v_and_b32_e32 v8, s0, v8 +; GFX8-NEXT: v_and_b32_e32 v9, s0, v9 ; GFX8-NEXT: v_ashrrev_i16_e32 v10, 12, v10 ; GFX8-NEXT: v_ashrrev_i16_e32 v11, 12, v11 -; GFX8-NEXT: s_bfe_i32 s0, s0, 0x40018 -; GFX8-NEXT: v_and_b32_e32 v10, s2, v10 -; GFX8-NEXT: v_and_b32_e32 v11, s2, v11 +; GFX8-NEXT: s_bfe_i32 s2, s2, 0x40018 +; GFX8-NEXT: v_and_b32_e32 v10, s0, v10 +; GFX8-NEXT: v_and_b32_e32 v11, s0, v11 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mad_i32_i24 v2, s12, v12, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s6, v12, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, v3, v4, v2 ; GFX8-NEXT: v_add_u32_sdwa v2, vcc, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX8-NEXT: v_mad_u32_u24 v2, v6, v7, v2 -; GFX8-NEXT: v_mad_i32_i24 v2, s16, v13, v2 +; GFX8-NEXT: v_mad_i32_i24 v2, s14, v13, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, v8, v9, v2 -; GFX8-NEXT: v_mov_b32_e32 v3, s1 -; GFX8-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_mad_i32_i24 v2, s2, v3, v2 ; GFX8-NEXT: v_mad_u32_u24 v2, v10, v11, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm @@ -3181,67 +3181,67 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NEXT: s_movk_i32 s2, 0xff ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshr_b32 s7, s0, 4 -; GFX9-NEXT: s_lshr_b32 s11, s1, 4 -; GFX9-NEXT: v_lshlrev_b16_e64 v3, 12, s7 -; GFX9-NEXT: v_lshlrev_b16_e64 v4, 12, s11 -; GFX9-NEXT: s_bfe_i32 s13, s1, 0x40000 +; GFX9-NEXT: s_lshr_b32 s0, s2, 4 +; GFX9-NEXT: s_lshr_b32 s1, s4, 4 +; GFX9-NEXT: v_lshlrev_b16_e64 v3, 12, s0 +; GFX9-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX9-NEXT: s_bfe_i32 s5, s4, 0x40000 +; GFX9-NEXT: s_lshr_b32 s0, s2, 12 +; GFX9-NEXT: v_lshlrev_b16_e64 v6, 12, s0 +; GFX9-NEXT: s_lshr_b32 s1, s4, 12 +; GFX9-NEXT: s_bfe_i32 s7, s4, 0x40008 +; GFX9-NEXT: s_bfe_i32 s6, s2, 0x40000 +; GFX9-NEXT: v_mov_b32_e32 v12, s5 ; GFX9-NEXT: v_ashrrev_i16_e32 v3, 12, v3 +; GFX9-NEXT: s_movk_i32 s0, 0xff ; GFX9-NEXT: v_ashrrev_i16_e32 v4, 12, v4 -; GFX9-NEXT: s_lshr_b32 s6, s0, 12 -; GFX9-NEXT: s_lshr_b32 s10, s1, 12 -; GFX9-NEXT: s_bfe_i32 s15, s1, 0x40008 -; GFX9-NEXT: s_bfe_i32 s12, s0, 0x40000 -; GFX9-NEXT: v_mov_b32_e32 v12, s13 -; GFX9-NEXT: v_lshlrev_b16_e64 v6, 12, s6 -; GFX9-NEXT: v_lshlrev_b16_e64 v7, 12, s10 -; GFX9-NEXT: s_bfe_i32 s14, s0, 0x40008 -; GFX9-NEXT: v_mov_b32_e32 v5, s15 -; GFX9-NEXT: v_and_b32_e32 v3, s2, v3 -; GFX9-NEXT: v_and_b32_e32 v4, s2, v4 +; GFX9-NEXT: v_lshlrev_b16_e64 v7, 12, s1 +; GFX9-NEXT: s_bfe_i32 s8, s2, 0x40008 +; GFX9-NEXT: v_mov_b32_e32 v5, s7 +; GFX9-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX9-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX9-NEXT: v_ashrrev_i16_e32 v6, 12, v6 ; GFX9-NEXT: v_ashrrev_i16_e32 v7, 12, v7 -; GFX9-NEXT: s_lshr_b32 s5, s0, 20 -; GFX9-NEXT: s_lshr_b32 s9, s1, 20 -; GFX9-NEXT: v_mul_i32_i24_e32 v5, s14, v5 -; GFX9-NEXT: v_lshlrev_b16_e64 v8, 12, s5 -; GFX9-NEXT: v_lshlrev_b16_e64 v9, 12, s9 -; GFX9-NEXT: s_bfe_i32 s17, s1, 0x40010 -; GFX9-NEXT: v_and_b32_e32 v6, s2, v6 -; GFX9-NEXT: v_and_b32_e32 v7, s2, v7 -; GFX9-NEXT: s_lshr_b32 s8, s1, 28 +; GFX9-NEXT: s_lshr_b32 s9, s2, 20 +; GFX9-NEXT: s_lshr_b32 s10, s4, 20 +; GFX9-NEXT: v_mul_i32_i24_e32 v5, s8, v5 +; GFX9-NEXT: v_lshlrev_b16_e64 v8, 12, s9 +; GFX9-NEXT: v_lshlrev_b16_e64 v9, 12, s10 +; GFX9-NEXT: s_bfe_i32 s11, s4, 0x40010 +; GFX9-NEXT: v_and_b32_e32 v6, s0, v6 +; GFX9-NEXT: v_and_b32_e32 v7, s0, v7 +; GFX9-NEXT: s_lshr_b32 s13, s4, 28 ; GFX9-NEXT: v_ashrrev_i16_e32 v8, 12, v8 ; GFX9-NEXT: v_ashrrev_i16_e32 v9, 12, v9 -; GFX9-NEXT: s_lshr_b32 s4, s0, 28 -; GFX9-NEXT: s_bfe_i32 s16, s0, 0x40010 -; GFX9-NEXT: v_mov_b32_e32 v13, s17 -; GFX9-NEXT: v_lshlrev_b16_e64 v10, 12, s4 -; GFX9-NEXT: v_lshlrev_b16_e64 v11, 12, s8 -; GFX9-NEXT: s_bfe_i32 s1, s1, 0x40018 -; GFX9-NEXT: v_and_b32_e32 v8, s2, v8 -; GFX9-NEXT: v_and_b32_e32 v9, s2, v9 +; GFX9-NEXT: s_lshr_b32 s12, s2, 28 +; GFX9-NEXT: s_bfe_i32 s14, s2, 0x40010 +; GFX9-NEXT: v_mov_b32_e32 v13, s11 +; GFX9-NEXT: v_lshlrev_b16_e64 v10, 12, s12 +; GFX9-NEXT: v_lshlrev_b16_e64 v11, 12, s13 +; GFX9-NEXT: s_bfe_i32 s4, s4, 0x40018 +; GFX9-NEXT: v_and_b32_e32 v8, s0, v8 +; GFX9-NEXT: v_and_b32_e32 v9, s0, v9 ; GFX9-NEXT: v_ashrrev_i16_e32 v10, 12, v10 ; GFX9-NEXT: v_ashrrev_i16_e32 v11, 12, v11 -; GFX9-NEXT: s_bfe_i32 s0, s0, 0x40018 -; GFX9-NEXT: v_and_b32_e32 v10, s2, v10 -; GFX9-NEXT: v_and_b32_e32 v11, s2, v11 +; GFX9-NEXT: s_bfe_i32 s2, s2, 0x40018 +; GFX9-NEXT: v_and_b32_e32 v10, s0, v10 +; GFX9-NEXT: v_and_b32_e32 v11, s0, v11 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mad_i32_i24 v2, s12, v12, v2 +; GFX9-NEXT: v_mad_i32_i24 v2, s6, v12, v2 ; GFX9-NEXT: v_mad_u32_u24 v2, v3, v4, v2 ; GFX9-NEXT: v_add_u32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-NEXT: v_mad_u32_u24 v2, v6, v7, v2 -; GFX9-NEXT: v_mad_i32_i24 v2, s16, v13, v2 +; GFX9-NEXT: v_mad_i32_i24 v2, s14, v13, v2 ; GFX9-NEXT: v_mad_u32_u24 v2, v8, v9, v2 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NEXT: v_mad_i32_i24 v2, s2, v3, v2 ; GFX9-NEXT: v_mad_u32_u24 v2, v10, v11, v2 ; GFX9-NEXT: global_store_byte v[0:1], v2, off ; GFX9-NEXT: s_endpgm @@ -3250,67 +3250,67 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_movk_i32 s2, 0xff ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-DL-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s4, s[6:7], 0x0 ; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: global_load_ubyte v2, v[0:1], off -; GFX9-DL-NEXT: s_load_dword s0, s[4:5], 0x0 -; GFX9-DL-NEXT: s_load_dword s1, s[6:7], 0x0 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_lshr_b32 s7, s0, 4 -; GFX9-DL-NEXT: s_lshr_b32 s11, s1, 4 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v3, 12, s7 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v4, 12, s11 -; GFX9-DL-NEXT: s_bfe_i32 s13, s1, 0x40000 +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 4 +; GFX9-DL-NEXT: s_lshr_b32 s1, s4, 4 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v3, 12, s0 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v4, 12, s1 +; GFX9-DL-NEXT: s_bfe_i32 s5, s4, 0x40000 +; GFX9-DL-NEXT: s_lshr_b32 s0, s2, 12 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v6, 12, s0 +; GFX9-DL-NEXT: s_lshr_b32 s1, s4, 12 +; GFX9-DL-NEXT: s_bfe_i32 s7, s4, 0x40008 +; GFX9-DL-NEXT: s_bfe_i32 s6, s2, 0x40000 +; GFX9-DL-NEXT: v_mov_b32_e32 v12, s5 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v3, 12, v3 +; GFX9-DL-NEXT: s_movk_i32 s0, 0xff ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v4, 12, v4 -; GFX9-DL-NEXT: s_lshr_b32 s6, s0, 12 -; GFX9-DL-NEXT: s_lshr_b32 s10, s1, 12 -; GFX9-DL-NEXT: s_bfe_i32 s15, s1, 0x40008 -; GFX9-DL-NEXT: s_bfe_i32 s12, s0, 0x40000 -; GFX9-DL-NEXT: v_mov_b32_e32 v12, s13 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v6, 12, s6 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v7, 12, s10 -; GFX9-DL-NEXT: s_bfe_i32 s14, s0, 0x40008 -; GFX9-DL-NEXT: v_mov_b32_e32 v5, s15 -; GFX9-DL-NEXT: v_and_b32_e32 v3, s2, v3 -; GFX9-DL-NEXT: v_and_b32_e32 v4, s2, v4 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v7, 12, s1 +; GFX9-DL-NEXT: s_bfe_i32 s8, s2, 0x40008 +; GFX9-DL-NEXT: v_mov_b32_e32 v5, s7 +; GFX9-DL-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX9-DL-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v6, 12, v6 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v7, 12, v7 -; GFX9-DL-NEXT: s_lshr_b32 s5, s0, 20 -; GFX9-DL-NEXT: s_lshr_b32 s9, s1, 20 -; GFX9-DL-NEXT: v_mul_i32_i24_e32 v5, s14, v5 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v8, 12, s5 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v9, 12, s9 -; GFX9-DL-NEXT: s_bfe_i32 s17, s1, 0x40010 -; GFX9-DL-NEXT: v_and_b32_e32 v6, s2, v6 -; GFX9-DL-NEXT: v_and_b32_e32 v7, s2, v7 -; GFX9-DL-NEXT: s_lshr_b32 s8, s1, 28 +; GFX9-DL-NEXT: s_lshr_b32 s9, s2, 20 +; GFX9-DL-NEXT: s_lshr_b32 s10, s4, 20 +; GFX9-DL-NEXT: v_mul_i32_i24_e32 v5, s8, v5 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v8, 12, s9 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v9, 12, s10 +; GFX9-DL-NEXT: s_bfe_i32 s11, s4, 0x40010 +; GFX9-DL-NEXT: v_and_b32_e32 v6, s0, v6 +; GFX9-DL-NEXT: v_and_b32_e32 v7, s0, v7 +; GFX9-DL-NEXT: s_lshr_b32 s13, s4, 28 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v8, 12, v8 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v9, 12, v9 -; GFX9-DL-NEXT: s_lshr_b32 s4, s0, 28 -; GFX9-DL-NEXT: s_bfe_i32 s16, s0, 0x40010 -; GFX9-DL-NEXT: v_mov_b32_e32 v13, s17 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v10, 12, s4 -; GFX9-DL-NEXT: v_lshlrev_b16_e64 v11, 12, s8 -; GFX9-DL-NEXT: s_bfe_i32 s1, s1, 0x40018 -; GFX9-DL-NEXT: v_and_b32_e32 v8, s2, v8 -; GFX9-DL-NEXT: v_and_b32_e32 v9, s2, v9 +; GFX9-DL-NEXT: s_lshr_b32 s12, s2, 28 +; GFX9-DL-NEXT: s_bfe_i32 s14, s2, 0x40010 +; GFX9-DL-NEXT: v_mov_b32_e32 v13, s11 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v10, 12, s12 +; GFX9-DL-NEXT: v_lshlrev_b16_e64 v11, 12, s13 +; GFX9-DL-NEXT: s_bfe_i32 s4, s4, 0x40018 +; GFX9-DL-NEXT: v_and_b32_e32 v8, s0, v8 +; GFX9-DL-NEXT: v_and_b32_e32 v9, s0, v9 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v10, 12, v10 ; GFX9-DL-NEXT: v_ashrrev_i16_e32 v11, 12, v11 -; GFX9-DL-NEXT: s_bfe_i32 s0, s0, 0x40018 -; GFX9-DL-NEXT: v_and_b32_e32 v10, s2, v10 -; GFX9-DL-NEXT: v_and_b32_e32 v11, s2, v11 +; GFX9-DL-NEXT: s_bfe_i32 s2, s2, 0x40018 +; GFX9-DL-NEXT: v_and_b32_e32 v10, s0, v10 +; GFX9-DL-NEXT: v_and_b32_e32 v11, s0, v11 ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s12, v12, v2 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s6, v12, v2 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, v3, v4, v2 ; GFX9-DL-NEXT: v_add_u32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, v6, v7, v2 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s16, v13, v2 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s14, v13, v2 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, v8, v9, v2 -; GFX9-DL-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s2, v3, v2 ; GFX9-DL-NEXT: v_mad_u32_u24 v2, v10, v11, v2 ; GFX9-DL-NEXT: global_store_byte v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm @@ -3387,51 +3387,51 @@ define amdgpu_kernel void @idot8_multiuses_mul1(<8 x i4> addrspace(1)* %src1, ; GFX7-LABEL: idot8_multiuses_mul1: ; GFX7: ; %bb.0: ; %entry -; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 -; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 -; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 -; GFX7-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX7-NEXT: s_load_dword s1, s[6:7], 0x0 +; GFX7-NEXT: s_load_dword s2, s[8:9], 0x0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_i32 s8, s0, 0x40000 -; GFX7-NEXT: s_bfe_i32 s9, s1, 0x40000 -; GFX7-NEXT: v_mov_b32_e32 v0, s9 +; GFX7-NEXT: s_bfe_i32 s4, s0, 0x40000 +; GFX7-NEXT: s_bfe_i32 s5, s1, 0x40000 +; GFX7-NEXT: v_mov_b32_e32 v0, s5 ; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: v_mad_i32_i24 v1, s8, v0, v1 -; GFX7-NEXT: s_bfe_i32 s11, s1, 0x40004 -; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40004 -; GFX7-NEXT: s_bfe_i32 s13, s1, 0x40008 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v0, v1 +; GFX7-NEXT: v_mad_i32_i24 v1, s4, v0, v1 +; GFX7-NEXT: s_bfe_i32 s7, s1, 0x40004 +; GFX7-NEXT: s_bfe_i32 s6, s0, 0x40004 +; GFX7-NEXT: s_bfe_i32 s11, s1, 0x40008 +; GFX7-NEXT: v_mad_i32_i24 v0, s4, v0, v1 +; GFX7-NEXT: v_mov_b32_e32 v2, s7 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v2, s11 +; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40008 +; GFX7-NEXT: s_bfe_i32 s13, s1, 0x4000c ; GFX7-NEXT: v_mad_i32_i24 v0, s10, v2, v0 -; GFX7-NEXT: s_bfe_i32 s12, s0, 0x40008 +; GFX7-NEXT: s_bfe_i32 s12, s0, 0x4000c ; GFX7-NEXT: v_mov_b32_e32 v2, s13 -; GFX7-NEXT: s_bfe_i32 s15, s1, 0x4000c +; GFX7-NEXT: s_bfe_i32 s15, s1, 0x40010 ; GFX7-NEXT: v_mad_i32_i24 v0, s12, v2, v0 -; GFX7-NEXT: s_bfe_i32 s14, s0, 0x4000c +; GFX7-NEXT: s_bfe_i32 s14, s0, 0x40010 ; GFX7-NEXT: v_mov_b32_e32 v2, s15 -; GFX7-NEXT: s_bfe_i32 s17, s1, 0x40010 +; GFX7-NEXT: s_bfe_i32 s17, s1, 0x40014 +; GFX7-NEXT: s_bfe_i32 s19, s1, 0x40018 ; GFX7-NEXT: v_mad_i32_i24 v0, s14, v2, v0 -; GFX7-NEXT: s_bfe_i32 s16, s0, 0x40010 +; GFX7-NEXT: s_bfe_i32 s16, s0, 0x40014 ; GFX7-NEXT: v_mov_b32_e32 v2, s17 -; GFX7-NEXT: s_bfe_i32 s19, s1, 0x40014 -; GFX7-NEXT: s_bfe_i32 s21, s1, 0x40018 +; GFX7-NEXT: s_bfe_i32 s18, s0, 0x40018 ; GFX7-NEXT: v_mad_i32_i24 v0, s16, v2, v0 -; GFX7-NEXT: s_bfe_i32 s18, s0, 0x40014 ; GFX7-NEXT: v_mov_b32_e32 v2, s19 -; GFX7-NEXT: s_bfe_i32 s20, s0, 0x40018 -; GFX7-NEXT: v_mad_i32_i24 v0, s18, v2, v0 -; GFX7-NEXT: v_mov_b32_e32 v2, s21 ; GFX7-NEXT: s_ashr_i32 s1, s1, 28 -; GFX7-NEXT: v_mad_i32_i24 v0, s20, v2, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s18, v2, v0 ; GFX7-NEXT: s_ashr_i32 s0, s0, 28 ; GFX7-NEXT: v_mov_b32_e32 v2, s1 ; GFX7-NEXT: v_mad_i32_i24 v0, s0, v2, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX7-NEXT: s_mov_b32 s11, 0xf000 +; GFX7-NEXT: s_mov_b32 s10, -1 +; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: idot8_multiuses_mul1: @@ -3656,49 +3656,46 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd -; GFX7-NEXT: s_mov_b32 s0, 0 -; GFX7-NEXT: s_mov_b32 s12, s0 -; GFX7-NEXT: s_mov_b32 s14, s0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_load_dword s1, s[8:9], 0x0 ; GFX7-NEXT: s_load_dword s2, s[10:11], 0x0 -; GFX7-NEXT: s_load_dword s38, s[4:5], 0x0 -; GFX7-NEXT: s_mov_b32 s16, s0 -; GFX7-NEXT: s_mov_b32 s18, s0 +; GFX7-NEXT: s_load_dword s36, s[4:5], 0x0 +; GFX7-NEXT: s_mov_b32 s0, 0 +; GFX7-NEXT: s_mov_b32 s6, s0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_lshl_b32 s13, s1, 4 -; GFX7-NEXT: s_lshl_b32 s15, s1, 12 -; GFX7-NEXT: s_lshl_b32 s17, s1, 16 -; GFX7-NEXT: s_lshl_b32 s19, s1, 20 -; GFX7-NEXT: s_ashr_i64 s[10:11], s[12:13], 60 -; GFX7-NEXT: s_lshl_b32 s13, s1, 8 -; GFX7-NEXT: s_ashr_i64 s[8:9], s[0:1], 60 -; GFX7-NEXT: s_lshl_b32 s21, s1, 24 +; GFX7-NEXT: s_lshl_b32 s7, s1, 4 +; GFX7-NEXT: s_ashr_i64 s[10:11], s[0:1], 60 +; GFX7-NEXT: s_lshl_b32 s9, s1, 8 +; GFX7-NEXT: s_lshl_b32 s13, s1, 12 +; GFX7-NEXT: s_lshl_b32 s15, s1, 16 +; GFX7-NEXT: s_lshl_b32 s17, s1, 20 +; GFX7-NEXT: s_lshl_b32 s19, s1, 24 ; GFX7-NEXT: s_lshl_b32 s1, s1, 28 -; GFX7-NEXT: s_ashr_i64 s[22:23], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[20:21], s[0:1], 60 ; GFX7-NEXT: s_mov_b32 s1, s2 -; GFX7-NEXT: s_ashr_i64 s[24:25], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[22:23], s[0:1], 60 ; GFX7-NEXT: s_lshl_b32 s1, s2, 4 -; GFX7-NEXT: s_ashr_i64 s[26:27], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[24:25], s[0:1], 60 ; GFX7-NEXT: s_lshl_b32 s1, s2, 8 -; GFX7-NEXT: s_ashr_i64 s[28:29], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[26:27], s[0:1], 60 ; GFX7-NEXT: s_lshl_b32 s1, s2, 12 -; GFX7-NEXT: s_ashr_i64 s[30:31], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[28:29], s[0:1], 60 ; GFX7-NEXT: s_lshl_b32 s1, s2, 16 -; GFX7-NEXT: s_ashr_i64 s[32:33], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[30:31], s[0:1], 60 ; GFX7-NEXT: s_lshl_b32 s1, s2, 20 -; GFX7-NEXT: s_ashr_i64 s[34:35], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[32:33], s[0:1], 60 ; GFX7-NEXT: s_lshl_b32 s1, s2, 24 -; GFX7-NEXT: s_ashr_i64 s[36:37], s[0:1], 60 +; GFX7-NEXT: s_ashr_i64 s[34:35], s[0:1], 60 ; GFX7-NEXT: s_lshl_b32 s1, s2, 28 -; GFX7-NEXT: s_mov_b32 s20, s0 +; GFX7-NEXT: s_mov_b32 s8, s0 +; GFX7-NEXT: s_mov_b32 s12, s0 +; GFX7-NEXT: s_mov_b32 s14, s0 +; GFX7-NEXT: s_mov_b32 s16, s0 +; GFX7-NEXT: s_mov_b32 s18, s0 ; GFX7-NEXT: s_ashr_i64 s[0:1], s[0:1], 60 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: v_mov_b32_e32 v1, s38 -; GFX7-NEXT: v_mad_i32_i24 v0, s22, v0, v1 -; GFX7-NEXT: s_ashr_i64 s[20:21], s[20:21], 60 ; GFX7-NEXT: v_mov_b32_e32 v1, s36 -; GFX7-NEXT: v_mad_i32_i24 v0, s20, v1, v0 +; GFX7-NEXT: v_mad_i32_i24 v0, s20, v0, v1 ; GFX7-NEXT: s_ashr_i64 s[18:19], s[18:19], 60 ; GFX7-NEXT: v_mov_b32_e32 v1, s34 ; GFX7-NEXT: v_mad_i32_i24 v0, s18, v1, v0 @@ -3711,12 +3708,16 @@ ; GFX7-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 ; GFX7-NEXT: v_mov_b32_e32 v1, s28 ; GFX7-NEXT: v_mad_i32_i24 v0, s12, v1, v0 +; GFX7-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 ; GFX7-NEXT: v_mov_b32_e32 v1, s26 -; GFX7-NEXT: v_mad_i32_i24 v0, s10, v1, v0 +; GFX7-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 +; GFX7-NEXT: v_mad_i32_i24 v0, s8, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, s24 +; GFX7-NEXT: v_mad_i32_i24 v0, s6, v1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, s22 +; GFX7-NEXT: v_mad_i32_i24 v0, s10, v1, v0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s6, -1 -; GFX7-NEXT: v_mad_i32_i24 v0, s8, v1, v0 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -3724,67 +3725,68 @@ ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX8-NEXT: s_mov_b32 s8, 0 -; GFX8-NEXT: s_mov_b32 s10, s8 -; GFX8-NEXT: s_mov_b32 s12, s8 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_load_dword s9, s[4:5], 0x0 +; GFX8-NEXT: s_load_dword s5, s[4:5], 0x0 ; GFX8-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX8-NEXT: s_load_dword s36, s[0:1], 0x0 -; GFX8-NEXT: s_mov_b32 s14, s8 -; GFX8-NEXT: s_mov_b32 s16, s8 +; GFX8-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_lshl_b32 s11, s9, 4 -; GFX8-NEXT: s_lshl_b32 s13, s9, 8 -; GFX8-NEXT: s_lshl_b32 s15, s9, 16 -; GFX8-NEXT: s_lshl_b32 s17, s9, 20 -; GFX8-NEXT: s_ashr_i64 s[6:7], s[10:11], 60 -; GFX8-NEXT: s_ashr_i64 s[10:11], s[12:13], 60 -; GFX8-NEXT: s_lshl_b32 s13, s9, 12 -; GFX8-NEXT: s_ashr_i64 s[4:5], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s19, s9, 24 -; GFX8-NEXT: s_lshl_b32 s9, s9, 28 -; GFX8-NEXT: s_ashr_i64 s[20:21], s[8:9], 60 -; GFX8-NEXT: s_mov_b32 s9, s2 -; GFX8-NEXT: s_ashr_i64 s[22:23], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s9, s2, 4 -; GFX8-NEXT: s_ashr_i64 s[24:25], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s9, s2, 8 -; GFX8-NEXT: s_ashr_i64 s[26:27], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s9, s2, 12 -; GFX8-NEXT: s_ashr_i64 s[28:29], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s9, s2, 16 -; GFX8-NEXT: s_ashr_i64 s[30:31], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s9, s2, 20 -; GFX8-NEXT: s_ashr_i64 s[32:33], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s9, s2, 24 -; GFX8-NEXT: s_ashr_i64 s[34:35], s[8:9], 60 -; GFX8-NEXT: s_lshl_b32 s9, s2, 28 -; GFX8-NEXT: s_mov_b32 s18, s8 -; GFX8-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX8-NEXT: v_mov_b32_e32 v0, s8 -; GFX8-NEXT: v_mov_b32_e32 v1, s36 -; GFX8-NEXT: v_mad_i32_i24 v0, s20, v0, v1 -; GFX8-NEXT: s_ashr_i64 s[18:19], s[18:19], 60 -; GFX8-NEXT: v_mov_b32_e32 v1, s34 -; GFX8-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX8-NEXT: s_ashr_i64 s[16:17], s[16:17], 60 -; GFX8-NEXT: v_mov_b32_e32 v1, s32 -; GFX8-NEXT: v_mad_i32_i24 v0, s16, v1, v0 +; GFX8-NEXT: s_lshl_b32 s1, s5, 4 +; GFX8-NEXT: s_lshl_b32 s7, s5, 8 +; GFX8-NEXT: v_mov_b32_e32 v2, s4 +; GFX8-NEXT: s_mov_b32 s4, 0 +; GFX8-NEXT: s_lshl_b32 s9, s5, 12 +; GFX8-NEXT: s_lshl_b32 s11, s5, 16 +; GFX8-NEXT: s_lshl_b32 s13, s5, 20 +; GFX8-NEXT: s_lshl_b32 s15, s5, 24 +; GFX8-NEXT: s_ashr_i64 s[16:17], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s5, 28 +; GFX8-NEXT: s_ashr_i64 s[18:19], s[4:5], 60 +; GFX8-NEXT: s_mov_b32 s5, s2 +; GFX8-NEXT: s_ashr_i64 s[20:21], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s2, 4 +; GFX8-NEXT: s_ashr_i64 s[22:23], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s2, 8 +; GFX8-NEXT: s_ashr_i64 s[24:25], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s2, 12 +; GFX8-NEXT: s_ashr_i64 s[26:27], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s2, 16 +; GFX8-NEXT: s_ashr_i64 s[28:29], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s2, 20 +; GFX8-NEXT: s_ashr_i64 s[30:31], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s2, 24 +; GFX8-NEXT: s_ashr_i64 s[32:33], s[4:5], 60 +; GFX8-NEXT: s_lshl_b32 s5, s2, 28 +; GFX8-NEXT: s_mov_b32 s0, s4 +; GFX8-NEXT: s_mov_b32 s6, s4 +; GFX8-NEXT: s_mov_b32 s8, s4 +; GFX8-NEXT: s_mov_b32 s10, s4 +; GFX8-NEXT: s_mov_b32 s12, s4 +; GFX8-NEXT: s_mov_b32 s14, s4 +; GFX8-NEXT: s_ashr_i64 s[4:5], s[4:5], 60 +; GFX8-NEXT: v_mov_b32_e32 v3, s4 +; GFX8-NEXT: v_mad_i32_i24 v2, s18, v3, v2 ; GFX8-NEXT: s_ashr_i64 s[14:15], s[14:15], 60 -; GFX8-NEXT: v_mov_b32_e32 v1, s30 -; GFX8-NEXT: v_mad_i32_i24 v0, s14, v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v3, s32 +; GFX8-NEXT: v_mad_i32_i24 v2, s14, v3, v2 ; GFX8-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX8-NEXT: v_mov_b32_e32 v1, s28 -; GFX8-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s26 -; GFX8-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s24 -; GFX8-NEXT: v_mad_i32_i24 v0, s6, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s22 -; GFX8-NEXT: v_mad_i32_i24 v2, s4, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_mov_b32_e32 v3, s30 +; GFX8-NEXT: v_mad_i32_i24 v2, s12, v3, v2 +; GFX8-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 +; GFX8-NEXT: v_mov_b32_e32 v3, s28 +; GFX8-NEXT: v_mad_i32_i24 v2, s10, v3, v2 +; GFX8-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 +; GFX8-NEXT: v_mov_b32_e32 v3, s26 +; GFX8-NEXT: v_mad_i32_i24 v2, s8, v3, v2 +; GFX8-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 +; GFX8-NEXT: v_mov_b32_e32 v3, s24 +; GFX8-NEXT: v_mad_i32_i24 v2, s6, v3, v2 +; GFX8-NEXT: s_ashr_i64 s[0:1], s[0:1], 60 +; GFX8-NEXT: v_mov_b32_e32 v3, s22 +; GFX8-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s20 +; GFX8-NEXT: v_mad_i32_i24 v2, s16, v3, v2 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_endpgm ; @@ -3792,67 +3794,68 @@ ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s8, 0 -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s9, s[4:5], 0x0 +; GFX9-NEXT: s_load_dword s5, s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-NEXT: s_load_dword s36, s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s16, s8 +; GFX9-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_lshl_b32 s11, s9, 4 -; GFX9-NEXT: s_lshl_b32 s13, s9, 8 -; GFX9-NEXT: s_lshl_b32 s15, s9, 16 -; GFX9-NEXT: s_lshl_b32 s17, s9, 20 -; GFX9-NEXT: s_ashr_i64 s[6:7], s[10:11], 60 -; GFX9-NEXT: s_ashr_i64 s[10:11], s[12:13], 60 -; GFX9-NEXT: s_lshl_b32 s13, s9, 12 -; GFX9-NEXT: s_ashr_i64 s[4:5], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s19, s9, 24 -; GFX9-NEXT: s_lshl_b32 s9, s9, 28 -; GFX9-NEXT: s_ashr_i64 s[20:21], s[8:9], 60 -; GFX9-NEXT: s_mov_b32 s9, s2 -; GFX9-NEXT: s_ashr_i64 s[22:23], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s9, s2, 4 -; GFX9-NEXT: s_ashr_i64 s[24:25], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s9, s2, 8 -; GFX9-NEXT: s_ashr_i64 s[26:27], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s9, s2, 12 -; GFX9-NEXT: s_ashr_i64 s[28:29], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s9, s2, 16 -; GFX9-NEXT: s_ashr_i64 s[30:31], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s9, s2, 20 -; GFX9-NEXT: s_ashr_i64 s[32:33], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s9, s2, 24 -; GFX9-NEXT: s_ashr_i64 s[34:35], s[8:9], 60 -; GFX9-NEXT: s_lshl_b32 s9, s2, 28 -; GFX9-NEXT: s_mov_b32 s18, s8 -; GFX9-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX9-NEXT: v_mov_b32_e32 v0, s8 -; GFX9-NEXT: v_mov_b32_e32 v1, s36 -; GFX9-NEXT: v_mad_i32_i24 v0, s20, v0, v1 -; GFX9-NEXT: s_ashr_i64 s[18:19], s[18:19], 60 -; GFX9-NEXT: v_mov_b32_e32 v1, s34 -; GFX9-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX9-NEXT: s_ashr_i64 s[16:17], s[16:17], 60 -; GFX9-NEXT: v_mov_b32_e32 v1, s32 -; GFX9-NEXT: v_mad_i32_i24 v0, s16, v1, v0 +; GFX9-NEXT: s_lshl_b32 s1, s5, 4 +; GFX9-NEXT: s_lshl_b32 s7, s5, 8 +; GFX9-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-NEXT: s_mov_b32 s4, 0 +; GFX9-NEXT: s_lshl_b32 s9, s5, 12 +; GFX9-NEXT: s_lshl_b32 s11, s5, 16 +; GFX9-NEXT: s_lshl_b32 s13, s5, 20 +; GFX9-NEXT: s_lshl_b32 s15, s5, 24 +; GFX9-NEXT: s_ashr_i64 s[16:17], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s5, 28 +; GFX9-NEXT: s_ashr_i64 s[18:19], s[4:5], 60 +; GFX9-NEXT: s_mov_b32 s5, s2 +; GFX9-NEXT: s_ashr_i64 s[20:21], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s2, 4 +; GFX9-NEXT: s_ashr_i64 s[22:23], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s2, 8 +; GFX9-NEXT: s_ashr_i64 s[24:25], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s2, 12 +; GFX9-NEXT: s_ashr_i64 s[26:27], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s2, 16 +; GFX9-NEXT: s_ashr_i64 s[28:29], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s2, 20 +; GFX9-NEXT: s_ashr_i64 s[30:31], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s2, 24 +; GFX9-NEXT: s_ashr_i64 s[32:33], s[4:5], 60 +; GFX9-NEXT: s_lshl_b32 s5, s2, 28 +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s6, s4 +; GFX9-NEXT: s_mov_b32 s8, s4 +; GFX9-NEXT: s_mov_b32 s10, s4 +; GFX9-NEXT: s_mov_b32 s12, s4 +; GFX9-NEXT: s_mov_b32 s14, s4 +; GFX9-NEXT: s_ashr_i64 s[4:5], s[4:5], 60 +; GFX9-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-NEXT: v_mad_i32_i24 v2, s18, v3, v2 ; GFX9-NEXT: s_ashr_i64 s[14:15], s[14:15], 60 -; GFX9-NEXT: v_mov_b32_e32 v1, s30 -; GFX9-NEXT: v_mad_i32_i24 v0, s14, v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v3, s32 +; GFX9-NEXT: v_mad_i32_i24 v2, s14, v3, v2 ; GFX9-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX9-NEXT: v_mov_b32_e32 v1, s28 -; GFX9-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s26 -; GFX9-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s24 -; GFX9-NEXT: v_mad_i32_i24 v0, s6, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, s22 -; GFX9-NEXT: v_mad_i32_i24 v2, s4, v1, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v3, s30 +; GFX9-NEXT: v_mad_i32_i24 v2, s12, v3, v2 +; GFX9-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 +; GFX9-NEXT: v_mov_b32_e32 v3, s28 +; GFX9-NEXT: v_mad_i32_i24 v2, s10, v3, v2 +; GFX9-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 +; GFX9-NEXT: v_mov_b32_e32 v3, s26 +; GFX9-NEXT: v_mad_i32_i24 v2, s8, v3, v2 +; GFX9-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 +; GFX9-NEXT: v_mov_b32_e32 v3, s24 +; GFX9-NEXT: v_mad_i32_i24 v2, s6, v3, v2 +; GFX9-NEXT: s_ashr_i64 s[0:1], s[0:1], 60 +; GFX9-NEXT: v_mov_b32_e32 v3, s22 +; GFX9-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX9-NEXT: v_mov_b32_e32 v3, s20 +; GFX9-NEXT: v_mad_i32_i24 v2, s16, v3, v2 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_endpgm ; @@ -3860,67 +3863,68 @@ ; GFX9-DL: ; %bb.0: ; %entry ; GFX9-DL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-DL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 -; GFX9-DL-NEXT: s_mov_b32 s8, 0 -; GFX9-DL-NEXT: s_mov_b32 s10, s8 -; GFX9-DL-NEXT: s_mov_b32 s12, s8 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_load_dword s9, s[4:5], 0x0 +; GFX9-DL-NEXT: s_load_dword s5, s[4:5], 0x0 ; GFX9-DL-NEXT: s_load_dword s2, s[6:7], 0x0 -; GFX9-DL-NEXT: s_load_dword s36, s[0:1], 0x0 -; GFX9-DL-NEXT: s_mov_b32 s14, s8 -; GFX9-DL-NEXT: s_mov_b32 s16, s8 +; GFX9-DL-NEXT: s_load_dword s4, s[0:1], 0x0 +; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-DL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DL-NEXT: s_lshl_b32 s11, s9, 4 -; GFX9-DL-NEXT: s_lshl_b32 s13, s9, 8 -; GFX9-DL-NEXT: s_lshl_b32 s15, s9, 16 -; GFX9-DL-NEXT: s_lshl_b32 s17, s9, 20 -; GFX9-DL-NEXT: s_ashr_i64 s[6:7], s[10:11], 60 -; GFX9-DL-NEXT: s_ashr_i64 s[10:11], s[12:13], 60 -; GFX9-DL-NEXT: s_lshl_b32 s13, s9, 12 -; GFX9-DL-NEXT: s_ashr_i64 s[4:5], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s19, s9, 24 -; GFX9-DL-NEXT: s_lshl_b32 s9, s9, 28 -; GFX9-DL-NEXT: s_ashr_i64 s[20:21], s[8:9], 60 -; GFX9-DL-NEXT: s_mov_b32 s9, s2 -; GFX9-DL-NEXT: s_ashr_i64 s[22:23], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s9, s2, 4 -; GFX9-DL-NEXT: s_ashr_i64 s[24:25], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s9, s2, 8 -; GFX9-DL-NEXT: s_ashr_i64 s[26:27], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s9, s2, 12 -; GFX9-DL-NEXT: s_ashr_i64 s[28:29], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s9, s2, 16 -; GFX9-DL-NEXT: s_ashr_i64 s[30:31], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s9, s2, 20 -; GFX9-DL-NEXT: s_ashr_i64 s[32:33], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s9, s2, 24 -; GFX9-DL-NEXT: s_ashr_i64 s[34:35], s[8:9], 60 -; GFX9-DL-NEXT: s_lshl_b32 s9, s2, 28 -; GFX9-DL-NEXT: s_mov_b32 s18, s8 -; GFX9-DL-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s8 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s36 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s20, v0, v1 -; GFX9-DL-NEXT: s_ashr_i64 s[18:19], s[18:19], 60 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s34 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s18, v1, v0 -; GFX9-DL-NEXT: s_ashr_i64 s[16:17], s[16:17], 60 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s32 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s16, v1, v0 +; GFX9-DL-NEXT: s_lshl_b32 s1, s5, 4 +; GFX9-DL-NEXT: s_lshl_b32 s7, s5, 8 +; GFX9-DL-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-DL-NEXT: s_mov_b32 s4, 0 +; GFX9-DL-NEXT: s_lshl_b32 s9, s5, 12 +; GFX9-DL-NEXT: s_lshl_b32 s11, s5, 16 +; GFX9-DL-NEXT: s_lshl_b32 s13, s5, 20 +; GFX9-DL-NEXT: s_lshl_b32 s15, s5, 24 +; GFX9-DL-NEXT: s_ashr_i64 s[16:17], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s5, 28 +; GFX9-DL-NEXT: s_ashr_i64 s[18:19], s[4:5], 60 +; GFX9-DL-NEXT: s_mov_b32 s5, s2 +; GFX9-DL-NEXT: s_ashr_i64 s[20:21], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s2, 4 +; GFX9-DL-NEXT: s_ashr_i64 s[22:23], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s2, 8 +; GFX9-DL-NEXT: s_ashr_i64 s[24:25], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s2, 12 +; GFX9-DL-NEXT: s_ashr_i64 s[26:27], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s2, 16 +; GFX9-DL-NEXT: s_ashr_i64 s[28:29], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s2, 20 +; GFX9-DL-NEXT: s_ashr_i64 s[30:31], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s2, 24 +; GFX9-DL-NEXT: s_ashr_i64 s[32:33], s[4:5], 60 +; GFX9-DL-NEXT: s_lshl_b32 s5, s2, 28 +; GFX9-DL-NEXT: s_mov_b32 s0, s4 +; GFX9-DL-NEXT: s_mov_b32 s6, s4 +; GFX9-DL-NEXT: s_mov_b32 s8, s4 +; GFX9-DL-NEXT: s_mov_b32 s10, s4 +; GFX9-DL-NEXT: s_mov_b32 s12, s4 +; GFX9-DL-NEXT: s_mov_b32 s14, s4 +; GFX9-DL-NEXT: s_ashr_i64 s[4:5], s[4:5], 60 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s4 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s18, v3, v2 ; GFX9-DL-NEXT: s_ashr_i64 s[14:15], s[14:15], 60 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s30 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s14, v1, v0 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s32 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s14, v3, v2 ; GFX9-DL-NEXT: s_ashr_i64 s[12:13], s[12:13], 60 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s28 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s12, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s26 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s10, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s24 -; GFX9-DL-NEXT: v_mad_i32_i24 v0, s6, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s22 -; GFX9-DL-NEXT: v_mad_i32_i24 v2, s4, v1, v0 -; GFX9-DL-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-DL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s30 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s12, v3, v2 +; GFX9-DL-NEXT: s_ashr_i64 s[10:11], s[10:11], 60 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s28 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s10, v3, v2 +; GFX9-DL-NEXT: s_ashr_i64 s[8:9], s[8:9], 60 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s26 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s8, v3, v2 +; GFX9-DL-NEXT: s_ashr_i64 s[6:7], s[6:7], 60 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s24 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s6, v3, v2 +; GFX9-DL-NEXT: s_ashr_i64 s[0:1], s[0:1], 60 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s22 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s0, v3, v2 +; GFX9-DL-NEXT: v_mov_b32_e32 v3, s20 +; GFX9-DL-NEXT: v_mad_i32_i24 v2, s16, v3, v2 ; GFX9-DL-NEXT: global_store_dword v[0:1], v2, off ; GFX9-DL-NEXT: s_endpgm <8 x i4> addrspace(1)* %src2, @@ -3962,70 +3966,70 @@ ; GFX7: ; %bb.0: ; %entry ; GFX7-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 +; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_bfe_i32 s2, s0, 0x40000 +; GFX7-NEXT: s_bfe_i32 s6, s1, 0x40000 +; GFX7-NEXT: s_bfe_i32 s7, s1, 0x40004 +; GFX7-NEXT: v_mov_b32_e32 v3, s6 +; GFX7-NEXT: v_mov_b32_e32 v2, s7 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s6, -1 +; GFX7-NEXT: buffer_load_ushort v4, off, s[4:7], 0 +; GFX7-NEXT: s_bfe_i32 s8, s1, 0x40008 +; GFX7-NEXT: s_bfe_i32 s9, s1, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v1, s8 +; GFX7-NEXT: s_bfe_i32 s11, s0, 0x40008 +; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40004 +; GFX7-NEXT: v_mov_b32_e32 v0, s9 +; GFX7-NEXT: s_bfe_i32 s12, s0, 0x4000c +; GFX7-NEXT: v_mul_i32_i24_e32 v1, s11, v1 +; GFX7-NEXT: v_mul_i32_i24_e32 v3, s2, v3 +; GFX7-NEXT: s_bfe_i32 s2, s1, 0x40010 +; GFX7-NEXT: s_bfe_i32 s8, s1, 0x40014 +; GFX7-NEXT: s_bfe_i32 s11, s1, 0x40018 +; GFX7-NEXT: s_ashr_i32 s1, s1, 28 +; GFX7-NEXT: v_mul_i32_i24_e32 v0, s12, v0 +; GFX7-NEXT: v_mul_i32_i24_e32 v2, s10, v2 +; GFX7-NEXT: s_bfe_i32 s9, s0, 0x40010 +; GFX7-NEXT: s_bfe_i32 s10, s0, 0x40014 +; GFX7-NEXT: s_bfe_i32 s12, s0, 0x40018 +; GFX7-NEXT: s_ashr_i32 s0, s0, 28 +; GFX7-NEXT: v_mov_b32_e32 v5, s1 +; GFX7-NEXT: v_mul_i32_i24_e32 v5, s0, v5 +; GFX7-NEXT: v_mov_b32_e32 v6, s11 ; GFX7-NEXT: s_mov_b32 s0, 0xffff -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s1, s[8:9], 0x0 -; GFX7-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; GFX7-NEXT: s_load_dword s2, s[10:11], 0x0 -; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_i32 s8, s1, 0x40010 -; GFX7-NEXT: s_bfe_i32 s9, s1, 0x40014 -; GFX7-NEXT: s_bfe_i32 s15, s2, 0x40010 -; GFX7-NEXT: s_bfe_i32 s16, s2, 0x40014 -; GFX7-NEXT: s_bfe_i32 s17, s2, 0x40018 -; GFX7-NEXT: s_ashr_i32 s18, s2, 28 -; GFX7-NEXT: s_bfe_i32 s19, s2, 0x40000 -; GFX7-NEXT: s_bfe_i32 s20, s2, 0x40004 -; GFX7-NEXT: s_bfe_i32 s21, s2, 0x40008 -; GFX7-NEXT: s_bfe_i32 s2, s2, 0x4000c -; GFX7-NEXT: s_bfe_i32 s10, s1, 0x40018 -; GFX7-NEXT: s_ashr_i32 s11, s1, 28 -; GFX7-NEXT: s_bfe_i32 s12, s1, 0x40000 -; GFX7-NEXT: v_mov_b32_e32 v4, s19 -; GFX7-NEXT: s_bfe_i32 s13, s1, 0x40004 -; GFX7-NEXT: v_mov_b32_e32 v3, s20 -; GFX7-NEXT: s_bfe_i32 s14, s1, 0x40008 -; GFX7-NEXT: v_mov_b32_e32 v2, s21 -; GFX7-NEXT: s_bfe_i32 s1, s1, 0x4000c -; GFX7-NEXT: v_mov_b32_e32 v1, s2 -; GFX7-NEXT: v_mov_b32_e32 v5, s18 -; GFX7-NEXT: v_mov_b32_e32 v6, s17 -; GFX7-NEXT: v_mul_i32_i24_e32 v1, s1, v1 -; GFX7-NEXT: v_mul_i32_i24_e32 v2, s14, v2 -; GFX7-NEXT: v_mul_i32_i24_e32 v3, s13, v3 -; GFX7-NEXT: v_mul_i32_i24_e32 v4, s12, v4 -; GFX7-NEXT: v_mul_i32_i24_e32 v5, s11, v5 -; GFX7-NEXT: v_mul_i32_i24_e32 v6, s10, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 -; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v4, s0, v4 -; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX7-NEXT: v_or_b32_e32 v2, v4, v3 +; GFX7-NEXT: v_mul_i32_i24_e32 v6, s12, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX7-NEXT: v_and_b32_e32 v3, s0, v3 +; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 ; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 -; GFX7-NEXT: v_mov_b32_e32 v7, s16 -; GFX7-NEXT: v_mov_b32_e32 v8, s15 -; GFX7-NEXT: v_or_b32_e32 v3, v6, v5 -; GFX7-NEXT: v_alignbit_b32 v5, v1, v2, 16 -; GFX7-NEXT: v_mul_i32_i24_e32 v7, s9, v7 -; GFX7-NEXT: v_mul_i32_i24_e32 v8, s8, v8 +; GFX7-NEXT: v_or_b32_e32 v1, v3, v2 +; GFX7-NEXT: v_mov_b32_e32 v7, s8 +; GFX7-NEXT: v_mov_b32_e32 v8, s2 +; GFX7-NEXT: v_or_b32_e32 v2, v6, v5 +; GFX7-NEXT: v_alignbit_b32 v5, v0, v1, 16 +; GFX7-NEXT: v_mul_i32_i24_e32 v7, s10, v7 +; GFX7-NEXT: v_mul_i32_i24_e32 v8, s9, v8 ; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX7-NEXT: v_and_b32_e32 v8, s0, v8 -; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GFX7-NEXT: v_or_b32_e32 v4, v8, v7 -; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX7-NEXT: v_or_b32_e32 v3, v8, v7 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX7-NEXT: v_lshrrev_b32_e32 v8, 16, v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v5, v0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v5, v1 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v1, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v6, v0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v4, v0 -; GFX7-NEXT: v_add_i32_e32 v0, vcc, v7, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v2, v0 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v8, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm @@ -4274,61 +4278,61 @@ ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b32 s6, -1 -; GFX7-NEXT: s_movk_i32 s0, 0xff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_load_dword s2, s[8:9], 0x0 +; GFX7-NEXT: s_load_dword s0, s[8:9], 0x0 +; GFX7-NEXT: s_load_dword s1, s[10:11], 0x0 ; GFX7-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 -; GFX7-NEXT: s_load_dword s8, s[10:11], 0x0 -; GFX7-NEXT: s_mov_b32 s1, 0xffff ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: s_bfe_i32 s9, s2, 0x40000 -; GFX7-NEXT: s_bfe_i32 s10, s2, 0x40004 -; GFX7-NEXT: s_bfe_i32 s16, s8, 0x40000 -; GFX7-NEXT: s_bfe_i32 s17, s8, 0x40004 -; GFX7-NEXT: s_bfe_i32 s18, s8, 0x40008 -; GFX7-NEXT: s_bfe_i32 s19, s8, 0x4000c -; GFX7-NEXT: s_bfe_i32 s20, s8, 0x40010 -; GFX7-NEXT: s_bfe_i32 s21, s8, 0x40014 -; GFX7-NEXT: s_bfe_i32 s22, s8, 0x40018 -; GFX7-NEXT: s_ashr_i32 s8, s8, 28 -; GFX7-NEXT: v_mov_b32_e32 v7, s17 -; GFX7-NEXT: v_mov_b32_e32 v8, s16 -; GFX7-NEXT: s_bfe_i32 s11, s2, 0x40008 -; GFX7-NEXT: v_mov_b32_e32 v6, s18 -; GFX7-NEXT: s_bfe_i32 s12, s2, 0x4000c -; GFX7-NEXT: v_mov_b32_e32 v5, s19 -; GFX7-NEXT: s_bfe_i32 s13, s2, 0x40010 -; GFX7-NEXT: v_mov_b32_e32 v4, s20 -; GFX7-NEXT: s_bfe_i32 s14, s2, 0x40014 -; GFX7-NEXT: v_mov_b32_e32 v3, s21 -; GFX7-NEXT: s_bfe_i32 s15, s2, 0x40018 -; GFX7-NEXT: v_mov_b32_e32 v2, s22 -; GFX7-NEXT: s_ashr_i32 s2, s2, 28 -; GFX7-NEXT: v_mov_b32_e32 v1, s8 -; GFX7-NEXT: v_mul_i32_i24_e32 v1, s2, v1 -; GFX7-NEXT: v_mul_i32_i24_e32 v2, s15, v2 -; GFX7-NEXT: v_mul_i32_i24_e32 v3, s14, v3 -; GFX7-NEXT: v_mul_i32_i24_e32 v4, s13, v4 -; GFX7-NEXT: v_mul_i32_i24_e32 v5, s12, v5 -; GFX7-NEXT: v_mul_i32_i24_e32 v6, s11, v6 -; GFX7-NEXT: v_mul_i32_i24_e32 v7, s10, v7 -; GFX7-NEXT: v_mul_i32_i24_e32 v8, s9, v8 +; GFX7-NEXT: s_bfe_i32 s2, s0, 0x40000 +; GFX7-NEXT: s_bfe_i32 s8, s1, 0x40000 +; GFX7-NEXT: s_bfe_i32 s9, s1, 0x40004 +; GFX7-NEXT: s_bfe_i32 s10, s1, 0x40008 +; GFX7-NEXT: s_bfe_i32 s11, s1, 0x4000c +; GFX7-NEXT: s_bfe_i32 s12, s1, 0x40010 +; GFX7-NEXT: s_bfe_i32 s13, s1, 0x40014 +; GFX7-NEXT: s_bfe_i32 s14, s1, 0x40018 +; GFX7-NEXT: s_ashr_i32 s1, s1, 28 +; GFX7-NEXT: v_mov_b32_e32 v8, s8 +; GFX7-NEXT: s_bfe_i32 s15, s0, 0x40004 +; GFX7-NEXT: v_mov_b32_e32 v7, s9 +; GFX7-NEXT: s_bfe_i32 s16, s0, 0x40008 +; GFX7-NEXT: v_mov_b32_e32 v6, s10 +; GFX7-NEXT: s_bfe_i32 s17, s0, 0x4000c +; GFX7-NEXT: v_mov_b32_e32 v5, s11 +; GFX7-NEXT: s_bfe_i32 s18, s0, 0x40010 +; GFX7-NEXT: v_mov_b32_e32 v4, s12 +; GFX7-NEXT: s_bfe_i32 s19, s0, 0x40014 +; GFX7-NEXT: v_mov_b32_e32 v3, s13 +; GFX7-NEXT: s_bfe_i32 s20, s0, 0x40018 +; GFX7-NEXT: v_mov_b32_e32 v2, s14 +; GFX7-NEXT: s_ashr_i32 s0, s0, 28 +; GFX7-NEXT: v_mov_b32_e32 v1, s1 +; GFX7-NEXT: v_mul_i32_i24_e32 v1, s0, v1 +; GFX7-NEXT: v_mul_i32_i24_e32 v3, s19, v3 +; GFX7-NEXT: v_mul_i32_i24_e32 v5, s17, v5 +; GFX7-NEXT: v_mul_i32_i24_e32 v7, s15, v7 +; GFX7-NEXT: v_mul_i32_i24_e32 v2, s20, v2 +; GFX7-NEXT: s_movk_i32 s0, 0xff +; GFX7-NEXT: v_mul_i32_i24_e32 v4, s18, v4 +; GFX7-NEXT: v_mul_i32_i24_e32 v6, s16, v6 +; GFX7-NEXT: v_mul_i32_i24_e32 v8, s2, v8 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v1 ; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 ; GFX7-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; GFX7-NEXT: v_and_b32_e32 v6, s0, v6 -; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 ; GFX7-NEXT: v_and_b32_e32 v8, s0, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 8, v7 ; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX7-NEXT: v_or_b32_e32 v2, v4, v3 ; GFX7-NEXT: v_or_b32_e32 v3, v6, v5 +; GFX7-NEXT: s_mov_b32 s0, 0xffff ; GFX7-NEXT: v_or_b32_e32 v4, v8, v7 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX7-NEXT: v_and_b32_e32 v2, s1, v2 +; GFX7-NEXT: v_and_b32_e32 v2, s0, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX7-NEXT: v_and_b32_e32 v4, s1, v4 +; GFX7-NEXT: v_and_b32_e32 v4, s0, v4 ; GFX7-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX7-NEXT: v_or_b32_e32 v2, v4, v3 ; GFX7-NEXT: v_alignbit_b32 v3, v1, v2, 8 Index: llvm/test/CodeGen/AMDGPU/immv216.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/immv216.ll +++ llvm/test/CodeGen/AMDGPU/immv216.ll @@ -412,7 +412,7 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16: -; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, -1 +; GFX9: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, -1 ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] ; GFX9: buffer_store_dword [[REG]] @@ -429,7 +429,7 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16: -; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfffefffe +; GFX9: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, 0xfffefffe ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] ; GFX9: buffer_store_dword [[REG]] @@ -446,7 +446,7 @@ } ; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16: -; GFX9: s_add_i32 [[VAL:s[0-9]+]], s4, 0xfff0fff0 +; GFX9: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9+]}}, 0xfff0fff0 ; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] ; GFX9: buffer_store_dword [[REG]] Index: llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -102,9 +102,9 @@ ; IDXMODE: v_mov_b32_e32 v1, ; IDXMODE: v_mov_b32_e32 v2, ; IDXMODE: v_mov_b32_e32 v3, -; IDXMODE-NEXT: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}} -; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} -; IDXMODE-NEXT: s_set_gpr_idx_off +; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}} +; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; IDXMODE: s_set_gpr_idx_off define amdgpu_kernel void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) { entry: %index = add i32 %offset, -512 @@ -225,10 +225,10 @@ ; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} ; MOVREL: v_movreld_b32_e32 v0, 5 -; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}} +; IDXMODE: s_add_i32 [[ADD_IDX:s[0-9]+]], s{{[0-9]+}}, 0xfffffe00{{$}} ; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], dst -; IDXMODE-NEXT: v_mov_b32_e32 v0, 5 -; IDXMODE-NEXT: s_set_gpr_idx_off +; IDXMODE: v_mov_b32_e32 v0, 5 +; IDXMODE: s_set_gpr_idx_off define amdgpu_kernel void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) { entry: %index = add i32 %offset, -512 @@ -483,10 +483,10 @@ ; GCN: s_load_dword [[ARG:s[0-9]+]] ; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000 -; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd ; MOVREL: s_waitcnt ; MOVREL: s_add_i32 m0, [[ARG]], -16 ; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, 4.0 +; MOVREL: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd ; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, -4.0 ; MOVREL: s_mov_b32 m0, -1 Index: llvm/test/CodeGen/AMDGPU/infinite-loop.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -73,7 +73,6 @@ ; SI-LABEL: {{^}}infinite_loops: ; SI: v_mov_b32_e32 [[REG1:v[0-9]+]], 0x3e7 -; SI: s_and_b64 vcc, exec, -1 ; SI: [[LOOP1:BB[0-9]+_[0-9]+]]: ; %loop1 ; SI: s_waitcnt lgkmcnt(0) @@ -81,8 +80,8 @@ ; SI: s_cbranch_vccnz [[LOOP1]] ; SI: s_branch [[RET:BB[0-9]+_[0-9]+]] -; SI: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378 -; SI: s_and_b64 vcc, exec, -1 +; SI-DAG: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378 +; SI-DAG: s_and_b64 vcc, exec, -1 ; SI: [[LOOP2:BB[0-9]+_[0-9]+]]: ; %loop2 ; SI: s_waitcnt lgkmcnt(0) Index: llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -11,12 +11,12 @@ ; GCN-LABEL: {{^}}insertelement_v4f32_0: ; GCN: s_load_dwordx4 +; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000 +; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]] ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000 -; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]] ; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]: define amdgpu_kernel void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0 Index: llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -277,7 +277,7 @@ ; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0 ; GCN-LABEL: {{^}}v_insertelement_v2i16_1: -; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000 +; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000 ; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 @@ -301,8 +301,8 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_1_inlineimm: -; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xfff10000 -; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] +; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfff10000 +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] @@ -363,7 +363,7 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1: -; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000 +; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000 ; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500 @@ -388,8 +388,8 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1_inlineimm: -; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x230000 -; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] +; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x230000 +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] @@ -448,8 +448,8 @@ ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GCN-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 -; GCN: {{flat|global}}_load_dword [[IDX:v[0-9]+]] -; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[IDX:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] @@ -476,8 +476,8 @@ ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} ; GCN-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 -; GCN: {{flat|global}}_load_dword [[IDX:v[0-9]+]] -; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[IDX:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 4, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] Index: llvm/test/CodeGen/AMDGPU/kernel-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -528,8 +528,8 @@ ; EGCM: VTX_READ_16 ; EGCM: VTX_READ_16 -; SI: s_load_dwordx8 s -; SI-NEXT: s_load_dwordx2 s +; SI-DAG: s_load_dwordx8 s +; SI-DAG: s_load_dwordx2 s ; SI-NOT: {{buffer|flat|global}}_load Index: llvm/test/CodeGen/AMDGPU/known-never-snan.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/known-never-snan.ll +++ llvm/test/CodeGen/AMDGPU/known-never-snan.ll @@ -455,13 +455,13 @@ ; GCN-LABEL: v_test_known_not_snan_round_input_fmed3_r_i_i_f32: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_trunc_f32_e32 v1, v0 +; GCN-NEXT: v_sub_f32_e32 v2, v0, v1 ; GCN-NEXT: s_brev_b32 s6, -2 -; GCN-NEXT: v_trunc_f32_e32 v2, v0 -; GCN-NEXT: v_bfi_b32 v1, s6, 1.0, v0 -; GCN-NEXT: v_sub_f32_e32 v0, v0, v2 -; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, 0.5 -; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GCN-NEXT: v_add_f32_e32 v0, v2, v0 +; GCN-NEXT: v_bfi_b32 v0, s6, 1.0, v0 +; GCN-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, 0.5 +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_add_f32_e32 v0, v1, v0 ; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GCN-NEXT: s_setpc_b64 s[30:31] %known.not.snan = call float @llvm.round.f32(float %a) Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -405,10 +405,10 @@ } ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64: -; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} +; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; CI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; CI-DAG: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -421,9 +421,9 @@ } ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64: -; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; CI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -229,10 +229,10 @@ } ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64: -; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} +; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; CI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; CI-DAG: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -246,8 +246,8 @@ ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64: ; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll @@ -11,8 +11,8 @@ ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[VB]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1 @@ -26,8 +26,8 @@ ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_fabs_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 @@ -42,8 +42,8 @@ ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_fneg_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %a.fneg = fsub float -0.0, %a @@ -58,8 +58,8 @@ ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 @@ -73,8 +73,8 @@ ; SI-LABEL: {{^}}test_class_1_f32: ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 1{{$}} -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 @@ -86,8 +86,8 @@ ; SI-LABEL: {{^}}test_class_64_f32: ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 64{{$}} -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1 @@ -101,8 +101,8 @@ ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}} ; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1 @@ -115,8 +115,8 @@ ; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} ; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 @@ -188,7 +188,7 @@ ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[VB]] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1 @@ -203,7 +203,7 @@ ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_fabs_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 @@ -219,7 +219,7 @@ ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_fneg_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %a.fneg = fsub double -0.0, %a @@ -235,7 +235,7 @@ ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 @@ -273,7 +273,7 @@ ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]] ; SI-NOT: vcc ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc -; SI-NEXT: buffer_store_dword [[RESULT]] +; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @test_class_full_mask_f64(i32 addrspace(1)* %out, [8 x i32], double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -17,7 +17,7 @@ ; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x94 ; GCN-DAG: s_and_b32 [[AND_I1:s[0-9]+]], 1, s{{[0-9]+}} -; GCN: v_cmp_eq_u32_e64 vcc, [[AND_I1]], 1 +; GCN-DAG: v_cmp_eq_u32_e64 vcc, [[AND_I1]], 1 ; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] ; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll @@ -19,7 +19,7 @@ ; VI-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0 ; VI-NEXT: s_nop 0 ; VI-NEXT: s_nop 0 -; VI-NEXT: v_mov_b32_dpp v2, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf +; VI-NEXT: v_mov_b32_dpp v{{[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf @0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4 define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr { bb: Index: llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll @@ -1,22 +1,68 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn-- -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=VI %s declare half @llvm.fma.f16(half %a, half %b, half %c) declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) -; GCN-LABEL: {{^}}fma_f16 -; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] -; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] -; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] -; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] -; GCN: buffer_store_short v[[R_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_f16( +; SI-LABEL: fma_f16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 +; SI-NEXT: s_mov_b32 s16, s2 +; SI-NEXT: s_mov_b32 s17, s3 +; SI-NEXT: s_mov_b32 s18, s10 +; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: buffer_load_ushort v0, off, s[16:19], 0 +; SI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 +; SI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, v0, v1, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_f16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s14, s10 +; VI-NEXT: s_mov_b32 s15, s11 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 +; VI-NEXT: s_mov_b32 s12, s6 +; VI-NEXT: s_mov_b32 s13, s7 +; VI-NEXT: s_mov_b32 s0, s2 +; VI-NEXT: s_mov_b32 s1, s3 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 +; VI-NEXT: buffer_load_ushort v2, off, s[12:15], 0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_fma_f16 v0, v0, v1, v2 +; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b, @@ -29,20 +75,56 @@ ret void } -; GCN-LABEL: {{^}}fma_f16_imm_a -; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] - -; SI: s_mov_b32 s[[A_F32:[0-9]+]], 0x40400000{{$}} -; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] -; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[B_F32:[0-9]]], s[[A_F32:[0-9]]], v[[C_F32:[0-9]]] -; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: s_movk_i32 s[[A_F16:[0-9]+]], 0x4200{{$}} -; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[B_F16]], s[[A_F16]], v[[C_F16]] -; GCN: buffer_store_short v[[R_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_a( +; SI-LABEL: fma_f16_imm_a: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 +; SI-NEXT: buffer_load_ushort v1, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s0, 0x40400000 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_fma_f32 v0, v1, s0, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_f16_imm_a: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 +; VI-NEXT: s_movk_i32 s0, 0x4200 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_fma_f16 v0, v0, s0, v1 +; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %b, half addrspace(1)* %c) { @@ -53,19 +135,56 @@ ret void } -; GCN-LABEL: {{^}}fma_f16_imm_b -; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] -; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] -; SI: s_mov_b32 s[[B_F32:[0-9]+]], 0x40400000{{$}} -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]] -; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], s[[B_F32:[0-9]]], v[[C_F32:[0-9]]] -; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: s_movk_i32 s[[B_F16:[0-9]+]], 0x4200{{$}} -; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], s[[B_F16]], v[[C_F16]] -; GCN: buffer_store_short v[[R_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_b( +; SI-LABEL: fma_f16_imm_b: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 +; SI-NEXT: buffer_load_ushort v1, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s0, 0x40400000 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_fma_f32 v0, v1, s0, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_f16_imm_b: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 +; VI-NEXT: s_movk_i32 s0, 0x4200 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_fma_f16 v0, v0, s0, v1 +; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %c) { @@ -76,19 +195,56 @@ ret void } -; GCN-LABEL: {{^}}fma_f16_imm_c -; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] -; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; SI: s_mov_b32 s[[C_F32:[0-9]+]], 0x40400000{{$}} -; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] -; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], s[[C_F32:[0-9]]] -; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: s_movk_i32 s[[C_F16:[0-9]+]], 0x4200{{$}} -; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], s[[C_F16]] -; GCN: buffer_store_short v[[R_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_c( +; SI-LABEL: fma_f16_imm_c: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 +; SI-NEXT: buffer_load_ushort v1, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s0, 0x40400000 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_fma_f32 v0, v1, v0, s0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_f16_imm_c: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 +; VI-NEXT: s_movk_i32 s0, 0x4200 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_fma_f16 v0, v0, v1, s0 +; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) { @@ -99,40 +255,82 @@ ret void } -; GCN-LABEL: {{^}}fma_v2f16 -; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] -; GCN: buffer_load_dword v[[C_V2_F16:[0-9]+]] - -; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; SI: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] -; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] -; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] -; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]] - -; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] - - -; SI-DAG: v_fma_f32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]], v[[C_F32_0]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] -; SI-DAG: v_fma_f32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]], v[[C_F32_1]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] - -; VI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; VI: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] -; VI-DAG: v_fma_f16 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]], v[[C_V2_F16]] -; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], v[[C_F16_1]] - -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] -; GCN-NOT: and -; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] -; GCN: buffer_store_dword v[[R_V2_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16( +; SI-LABEL: fma_v2f16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s6, s10 +; SI-NEXT: s_mov_b32 s7, s11 +; SI-NEXT: s_mov_b32 s16, s2 +; SI-NEXT: s_mov_b32 s17, s3 +; SI-NEXT: s_mov_b32 s18, s10 +; SI-NEXT: s_mov_b32 s19, s11 +; SI-NEXT: buffer_load_dword v0, off, s[16:19], 0 +; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0 +; SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s8, s0 +; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: s_waitcnt vmcnt(2) +; SI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, v0, v4, v5 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_fma_f32 v1, v3, v1, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s14, s10 +; VI-NEXT: s_mov_b32 s15, s11 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s0 +; VI-NEXT: s_mov_b32 s9, s1 +; VI-NEXT: s_mov_b32 s12, s6 +; VI-NEXT: s_mov_b32 s13, s7 +; VI-NEXT: s_mov_b32 s0, s2 +; VI-NEXT: s_mov_b32 s1, s3 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_load_dword v1, off, s[4:7], 0 +; VI-NEXT: buffer_load_dword v2, off, s[12:15], 0 +; VI-NEXT: s_waitcnt vmcnt(2) +; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; VI-NEXT: v_fma_f16 v3, v5, v4, v3 +; VI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; VI-NEXT: v_fma_f16 v0, v0, v1, v2 +; VI-NEXT: v_or_b32_e32 v0, v0, v3 +; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b, @@ -145,39 +343,70 @@ ret void } -; GCN-LABEL: {{^}}fma_v2f16_imm_a: -; SI: buffer_load_dword v[[C_V2_F16:[0-9]+]] -; SI: buffer_load_dword v[[B_V2_F16:[0-9]+]] - - -; VI: buffer_load_dword v[[C_V2_F16:[0-9]+]] -; VI: buffer_load_dword v[[B_V2_F16:[0-9]+]] - - -; SI: s_mov_b32 s[[A_F32:[0-9]+]], 0x40400000{{$}} -; VI: s_movk_i32 s[[A_F16:[0-9]+]], 0x4200{{$}} -; GCN-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; GCN-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] - -; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]] -; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] -; SI: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] - -; SI: v_fma_f32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], s[[A_F32]], v[[C_F32_1]] -; SI-DAG: v_fma_f32 v[[R_F32_0:[0-9]+]], v[[B_F32_0]], s[[A_F32]], v[[C_F32_0]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] - -; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[C_F16_1]], s[[A_F16]], v[[B_F16_1]] -; VI-DAG: v_fma_f16 v[[R_F16_0:[0-9]+]], v[[C_V2_F16]], s[[A_F16]], v[[B_V2_F16]] - -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] -; GCN-NOT: and -; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] -; GCN: buffer_store_dword v[[R_V2_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16_imm_a( +; SI-LABEL: fma_v2f16_imm_a: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s0, 0x40400000 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_fma_f32 v2, v3, s0, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, v1, s0, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; SI-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_v2f16_imm_a: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_load_dword v1, off, s[4:7], 0 +; VI-NEXT: s_movk_i32 s0, 0x4200 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; VI-NEXT: v_fma_f16 v2, v3, s0, v2 +; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; VI-NEXT: v_fma_f16 v0, v1, s0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v2 +; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %b, <2 x half> addrspace(1)* %c) { @@ -188,39 +417,70 @@ ret void } -; GCN-LABEL: {{^}}fma_v2f16_imm_b: -; SI: buffer_load_dword v[[C_V2_F16:[0-9]+]] -; SI: buffer_load_dword v[[A_V2_F16:[0-9]+]] - -; VI: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; VI: buffer_load_dword v[[C_V2_F16:[0-9]+]] - -; SI: s_mov_b32 s[[B_F32:[0-9]+]], 0x40400000{{$}} -; VI: s_movk_i32 s[[B_F16:[0-9]+]], 0x4200{{$}} - -; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] - -; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] -; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]] -; SI-DAG: v_fma_f32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], s[[B_F32]], v[[C_F32_0]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] -; SI-DAG: v_fma_f32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], s[[B_F32]], v[[C_F32_1]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] - -; VI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] -; VI-DAG: v_fma_f16 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], s[[B_F16]], v[[C_V2_F16]] -; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[A_F16_1]], s[[B_F16]], v[[C_F16_1]] - -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] -; GCN-NOT: and -; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] -; GCN: buffer_store_dword v[[R_V2_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16_imm_b( +; SI-LABEL: fma_v2f16_imm_b: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s0, 0x40400000 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_fma_f32 v2, v3, s0, v2 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, v1, s0, v0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; SI-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_v2f16_imm_b: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_load_dword v1, off, s[4:7], 0 +; VI-NEXT: s_movk_i32 s0, 0x4200 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; VI-NEXT: v_fma_f16 v2, v3, s0, v2 +; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; VI-NEXT: v_fma_f16 v0, v1, s0, v0 +; VI-NEXT: v_or_b32_e32 v0, v0, v2 +; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %c) { @@ -231,44 +491,70 @@ ret void } -; GCN-LABEL: {{^}}fma_v2f16_imm_c: -; SI: buffer_load_dword v[[B_V2_F16:[0-9]+]] -; SI: buffer_load_dword v[[A_V2_F16:[0-9]+]] - -; VI: buffer_load_dword v[[A_V2_F16:[0-9]+]] -; VI: buffer_load_dword v[[B_V2_F16:[0-9]+]] - -; SI: s_mov_b32 s[[C_F32:[0-9]+]], 0x40400000{{$}} -; VI: s_movk_i32 s[[C_F16:[0-9]+]], 0x4200{{$}} - -; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] - -; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] - -; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] -; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] - -; SI: v_fma_f32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]], s[[C_F32]] -; SI-DAG: v_fma_f32 v[[R_F32_0:[0-9]+]], v[[A_F32_0]], v[[B_F32_0]], s[[C_F32]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] -; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; SI: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] -; GCN-NOT: and -; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]] - -; VI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; VI-DAG: v_fma_f16 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]], s[[C_F16]] -; VI-DAG: v_fma_f16 v[[R_F16_1:[0-9]+]], v[[A_F16_1]], v[[B_F16_1]], s[[C_F16]] -; GCN-NOT: and -; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] - - -; GCN: buffer_store_dword v[[R_V2_F16]] -; GCN: s_endpgm define amdgpu_kernel void @fma_v2f16_imm_c( +; SI-LABEL: fma_v2f16_imm_c: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 +; SI-NEXT: s_mov_b32 s2, s10 +; SI-NEXT: s_mov_b32 s3, s11 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s12, s6 +; SI-NEXT: s_mov_b32 s13, s7 +; SI-NEXT: s_mov_b32 s14, s10 +; SI-NEXT: s_mov_b32 s15, s11 +; SI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 +; SI-NEXT: s_mov_b32 s0, 0x40400000 +; SI-NEXT: s_mov_b32 s8, s4 +; SI-NEXT: s_mov_b32 s9, s5 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SI-NEXT: v_fma_f32 v2, v3, v2, s0 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, v1, v0, s0 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v2 +; SI-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: fma_v2f16_imm_c: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 +; VI-NEXT: s_mov_b32 s4, s6 +; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 +; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_load_dword v1, off, s[4:7], 0 +; VI-NEXT: s_movk_i32 s0, 0x4200 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 +; VI-NEXT: v_fma_f16 v2, v3, v2, s0 +; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; VI-NEXT: v_fma_f16 v0, v1, v0, s0 +; VI-NEXT: v_or_b32_e32 v0, v0, v2 +; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; VI-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, <2 x half> addrspace(1)* %b) { Index: llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -13,8 +13,8 @@ ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s2, s10 ; SI-NEXT: s_mov_b32 s3, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -40,51 +40,51 @@ ; VI-LABEL: maxnum_f16: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s10, s2 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s11, s3 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_max_f16_e32 v0, v0, v0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_f16_e32 v1, v1, v1 ; VI-NEXT: v_max_f16_e32 v0, v0, v1 -; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: maxnum_f16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s10, s2 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GFX9-NEXT: s_mov_b32 s10, -1 +; GFX9-NEXT: s_mov_b32 s11, 0xf000 +; GFX9-NEXT: s_mov_b32 s2, s10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_mov_b32 s8, s4 +; GFX9-NEXT: s_mov_b32 s9, s5 ; GFX9-NEXT: s_mov_b32 s4, s6 ; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s11, s3 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 +; GFX9-NEXT: s_mov_b32 s3, s11 +; GFX9-NEXT: s_mov_b32 s6, s10 +; GFX9-NEXT: s_mov_b32 s7, s11 ; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; GFX9-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; GFX9-NEXT: buffer_load_ushort v1, off, s[0:3], 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 ; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX9-NEXT: buffer_store_short v0, off, s[8:11], 0 ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, @@ -100,35 +100,33 @@ define amdgpu_kernel void @maxnum_f16_imm_a( ; SI-LABEL: maxnum_f16_imm_a: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s10, s2 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s8, s6 -; SI-NEXT: s_mov_b32 s9, s7 +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; SI-NEXT: v_max_f32_e32 v0, 0x40400000, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: maxnum_f16_imm_a: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -140,14 +138,12 @@ ; ; GFX9-LABEL: maxnum_f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s4, s2 +; GFX9-NEXT: s_mov_b32 s5, s3 +; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, s2 ; GFX9-NEXT: s_mov_b32 s7, s3 ; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -168,35 +164,33 @@ define amdgpu_kernel void @maxnum_f16_imm_b( ; SI-LABEL: maxnum_f16_imm_b: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s10, s2 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s8, s6 -; SI-NEXT: s_mov_b32 s9, s7 +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; SI-NEXT: v_max_f32_e32 v0, 4.0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: maxnum_f16_imm_b: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -208,14 +202,12 @@ ; ; GFX9-LABEL: maxnum_f16_imm_b: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s4, s2 +; GFX9-NEXT: s_mov_b32 s5, s3 +; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, s2 ; GFX9-NEXT: s_mov_b32 s7, s3 ; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -238,18 +230,18 @@ ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s6, s[6:7], 0x0 +; SI-NEXT: s_load_dword s2, s[6:7], 0x0 ; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_lshr_b32 s1, s6, 16 +; SI-NEXT: s_lshr_b32 s1, s2, 16 ; SI-NEXT: v_cvt_f32_f16_e32 v1, s0 ; SI-NEXT: s_lshr_b32 s0, s0, 16 ; SI-NEXT: v_cvt_f32_f16_e32 v2, s0 ; SI-NEXT: v_cvt_f32_f16_e32 v3, s1 -; SI-NEXT: v_cvt_f32_f16_e32 v0, s6 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 @@ -258,53 +250,47 @@ ; SI-NEXT: v_max_f32_e32 v0, v0, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: maxnum_v2f16: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_load_dword s5, s[8:9], 0x0 +; VI-NEXT: s_load_dword s2, s[6:7], 0x0 +; VI-NEXT: s_load_dword s0, s[0:1], 0x0 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s4, s4 -; VI-NEXT: v_max_f16_e64 v0, s5, s5 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: s_lshr_b32 s5, s5, 16 -; VI-NEXT: v_max_f16_e32 v0, v1, v0 -; VI-NEXT: v_max_f16_e64 v1, s5, s5 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 +; VI-NEXT: s_lshr_b32 s1, s2, 16 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: s_lshr_b32 s0, s0, 16 +; VI-NEXT: v_max_f16_e32 v0, v0, v1 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: v_max_f16_e64 v2, s1, s1 ; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: maxnum_v2f16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0 +; GFX9-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v0, v1, v0 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_max_f16 v1, s0, s0 +; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -342,37 +328,33 @@ ; ; VI-LABEL: maxnum_v2f16_imm_a: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v2, 0x4400 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_load_dword s2, s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v0, s4, s4 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: v_max_f16_e64 v1, s4, s4 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: v_max_f16_e64 v1, s2, s2 ; VI-NEXT: v_max_f16_e32 v0, 0x4200, v0 ; VI-NEXT: v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: maxnum_v2f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x44004200 -; GFX9-NEXT: v_pk_max_f16 v0, v0, s4 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x44004200 +; GFX9-NEXT: v_pk_max_f16 v0, v0, s2 +; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, @@ -409,37 +391,33 @@ ; ; VI-LABEL: maxnum_v2f16_imm_b: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v2, 0x4200 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_load_dword s2, s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v0, s4, s4 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: v_max_f16_e64 v1, s4, s4 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: v_max_f16_e64 v1, s2, s2 ; VI-NEXT: v_max_f16_e32 v0, 4.0, v0 ; VI-NEXT: v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: maxnum_v2f16_imm_b: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x42004400 -; GFX9-NEXT: v_pk_max_f16 v0, v0, s4 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x42004400 +; GFX9-NEXT: v_pk_max_f16 v0, v0, s2 +; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, @@ -456,22 +434,21 @@ ; SI-LABEL: maxnum_v3f16: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; SI-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x0 -; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_lshr_b32 s1, s6, 16 -; SI-NEXT: s_lshr_b32 s4, s8, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v3, s1 -; SI-NEXT: v_cvt_f32_f16_e32 v2, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v1, s6 -; SI-NEXT: v_cvt_f32_f16_e32 v5, s8 -; SI-NEXT: v_cvt_f32_f16_e32 v0, s7 -; SI-NEXT: v_cvt_f32_f16_e32 v4, s9 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v1, s2 +; SI-NEXT: s_lshr_b32 s2, s2, 16 +; SI-NEXT: s_lshr_b32 s3, s0, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v2, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v3, s2 +; SI-NEXT: v_cvt_f32_f16_e32 v5, s0 +; SI-NEXT: v_cvt_f32_f16_e32 v4, s1 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_max_f32_e32 v2, v3, v2 @@ -485,60 +462,55 @@ ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 -; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 -; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 +; SI-NEXT: buffer_store_dword v1, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: maxnum_v3f16: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; VI-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s4, s4 -; VI-NEXT: v_max_f16_e64 v0, s6, s6 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: s_lshr_b32 s6, s6, 16 -; VI-NEXT: v_max_f16_e32 v0, v1, v0 -; VI-NEXT: v_max_f16_e64 v1, s6, s6 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: s_lshr_b32 s0, s0, 16 +; VI-NEXT: v_max_f16_e32 v0, v0, v1 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: v_max_f16_e64 v2, s2, s2 ; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_max_f16_e64 v1, s7, s7 -; VI-NEXT: v_max_f16_e64 v2, s5, s5 +; VI-NEXT: v_max_f16_e64 v1, s1, s1 +; VI-NEXT: v_max_f16_e64 v2, s3, s3 ; VI-NEXT: v_max_f16_e32 v1, v2, v1 -; VI-NEXT: buffer_store_short v1, off, s[0:3], 0 offset:4 -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: maxnum_v3f16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v0, s6, s6 -; GFX9-NEXT: v_pk_max_f16 v0, v1, v0 -; GFX9-NEXT: v_pk_max_f16 v2, s7, s7 -; GFX9-NEXT: v_pk_max_f16 v1, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v1, v1, v2 -; GFX9-NEXT: buffer_store_short v1, off, s[0:3], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_max_f16 v1, s0, s0 +; GFX9-NEXT: v_pk_max_f16 v2, s1, s1 +; GFX9-NEXT: v_pk_max_f16 v3, s3, s3 +; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX9-NEXT: v_pk_max_f16 v1, v3, v2 +; GFX9-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <3 x half> addrspace(1)* %r, <3 x half> addrspace(1)* %a, @@ -555,27 +527,25 @@ ; SI-LABEL: maxnum_v4f16: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; SI-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; SI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 -; SI-NEXT: s_lshr_b32 s4, s4, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v2, s4 -; SI-NEXT: s_lshr_b32 s4, s5, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v3, s4 -; SI-NEXT: s_lshr_b32 s4, s7, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v5, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v1, s5 -; SI-NEXT: s_lshr_b32 s4, s6, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v7, s7 -; SI-NEXT: v_cvt_f32_f16_e32 v6, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v4, s6 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 +; SI-NEXT: s_lshr_b32 s2, s2, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v1, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v2, s2 +; SI-NEXT: s_lshr_b32 s3, s3, 16 +; SI-NEXT: s_lshr_b32 s2, s1, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v3, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v5, s2 +; SI-NEXT: v_cvt_f32_f16_e32 v4, s0 +; SI-NEXT: s_lshr_b32 s0, s0, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v7, s1 +; SI-NEXT: v_cvt_f32_f16_e32 v6, s0 ; SI-NEXT: v_mul_f32_e32 v5, 1.0, v5 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_max_f32_e32 v3, v3, v5 @@ -596,61 +566,57 @@ ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v3 ; SI-NEXT: v_or_b32_e32 v0, v0, v2 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: maxnum_v4f16: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; VI-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s5, s5 -; VI-NEXT: v_max_f16_e64 v0, s7, s7 -; VI-NEXT: s_lshr_b32 s5, s5, 16 -; VI-NEXT: s_lshr_b32 s7, s7, 16 -; VI-NEXT: v_max_f16_e32 v0, v1, v0 -; VI-NEXT: v_max_f16_e64 v2, s5, s5 -; VI-NEXT: v_max_f16_e64 v1, s7, s7 +; VI-NEXT: v_max_f16_e64 v0, s3, s3 +; VI-NEXT: v_max_f16_e64 v1, s1, s1 +; VI-NEXT: s_lshr_b32 s3, s3, 16 +; VI-NEXT: s_lshr_b32 s1, s1, 16 +; VI-NEXT: v_max_f16_e32 v0, v0, v1 +; VI-NEXT: v_max_f16_e64 v1, s1, s1 +; VI-NEXT: v_max_f16_e64 v2, s3, s3 ; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_max_f16_e64 v2, s4, s4 -; VI-NEXT: v_max_f16_e64 v0, s6, s6 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: s_lshr_b32 s5, s6, 16 +; VI-NEXT: v_max_f16_e64 v0, s0, s0 +; VI-NEXT: v_max_f16_e64 v2, s2, s2 +; VI-NEXT: s_lshr_b32 s0, s0, 16 ; VI-NEXT: v_max_f16_e32 v0, v2, v0 -; VI-NEXT: v_max_f16_e64 v2, s5, s5 -; VI-NEXT: v_max_f16_e64 v3, s4, s4 +; VI-NEXT: v_max_f16_e64 v2, s0, s0 +; VI-NEXT: s_lshr_b32 s0, s2, 16 +; VI-NEXT: v_max_f16_e64 v3, s0, s0 ; VI-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: maxnum_v4f16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v0, s7, s7 -; GFX9-NEXT: v_pk_max_f16 v1, v1, v0 -; GFX9-NEXT: v_pk_max_f16 v2, s6, s6 -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s3, s3 +; GFX9-NEXT: v_pk_max_f16 v1, s1, s1 +; GFX9-NEXT: v_pk_max_f16 v2, s0, s0 +; GFX9-NEXT: v_pk_max_f16 v3, s2, s2 +; GFX9-NEXT: v_pk_max_f16 v1, v0, v1 +; GFX9-NEXT: v_pk_max_f16 v0, v3, v2 +; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, <4 x half> addrspace(1)* %a, @@ -666,27 +632,23 @@ define amdgpu_kernel void @fmax_v4f16_imm_a( ; SI-LABEL: fmax_v4f16_imm_a: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v1, s5 -; SI-NEXT: s_lshr_b32 s5, s5, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v2, s5 -; SI-NEXT: s_lshr_b32 s4, s4, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v3, s4 +; SI-NEXT: v_cvt_f32_f16_e32 v1, s3 +; SI-NEXT: s_lshr_b32 s3, s3, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 +; SI-NEXT: v_cvt_f32_f16_e32 v2, s3 +; SI-NEXT: s_lshr_b32 s2, s2, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v3, s2 ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_max_f32_e32 v2, 4.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_max_f32_e32 v1, 0x40400000, v1 -; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_max_f32_e32 v3, 2.0, v3 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; SI-NEXT: v_max_f32_e32 v0, 0x41000000, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 @@ -696,52 +658,50 @@ ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; SI-NEXT: v_or_b32_e32 v0, v0, v2 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: fmax_v4f16_imm_a: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x4400 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s5, s5 -; VI-NEXT: s_lshr_b32 s5, s5, 16 -; VI-NEXT: v_max_f16_e64 v3, s5, s5 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 -; VI-NEXT: v_max_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_max_f16_e32 v1, 0x4200, v1 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_max_f16_e32 v0, 0x4800, v2 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 -; VI-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NEXT: v_max_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: v_max_f16_e64 v0, s3, s3 +; VI-NEXT: v_max_f16_e64 v1, s2, s2 +; VI-NEXT: s_lshr_b32 s3, s3, 16 +; VI-NEXT: v_max_f16_e64 v2, s3, s3 +; VI-NEXT: v_max_f16_e32 v3, 0x4800, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0x4400 +; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_max_f16_e32 v0, 0x4200, v0 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: fmax_v4f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s8, 0x44004200 -; GFX9-NEXT: s_mov_b32 s9, 0x40004800 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v2, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v1, v0, s8 -; GFX9-NEXT: v_pk_max_f16 v0, v2, s9 +; GFX9-NEXT: v_pk_max_f16 v0, s3, s3 +; GFX9-NEXT: v_pk_max_f16 v2, s2, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x44004200 +; GFX9-NEXT: v_pk_max_f16 v1, v0, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x40004800 +; GFX9-NEXT: v_pk_max_f16 v0, v2, s2 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, Index: llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -13,8 +13,8 @@ ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s10, -1 +; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s2, s10 ; SI-NEXT: s_mov_b32 s3, s11 ; SI-NEXT: s_waitcnt lgkmcnt(0) @@ -40,51 +40,51 @@ ; VI-LABEL: minnum_f16_ieee: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s10, s2 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; VI-NEXT: s_mov_b32 s10, -1 +; VI-NEXT: s_mov_b32 s11, 0xf000 +; VI-NEXT: s_mov_b32 s2, s10 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_mov_b32 s8, s4 +; VI-NEXT: s_mov_b32 s9, s5 ; VI-NEXT: s_mov_b32 s4, s6 ; VI-NEXT: s_mov_b32 s5, s7 -; VI-NEXT: s_mov_b32 s11, s3 -; VI-NEXT: s_mov_b32 s6, s2 -; VI-NEXT: s_mov_b32 s7, s3 +; VI-NEXT: s_mov_b32 s3, s11 +; VI-NEXT: s_mov_b32 s6, s10 +; VI-NEXT: s_mov_b32 s7, s11 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 ; VI-NEXT: s_waitcnt vmcnt(1) ; VI-NEXT: v_max_f16_e32 v0, v0, v0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_max_f16_e32 v1, v1, v1 ; VI-NEXT: v_min_f16_e32 v0, v0, v1 -; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: minnum_f16_ieee: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s10, s2 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GFX9-NEXT: s_mov_b32 s10, -1 +; GFX9-NEXT: s_mov_b32 s11, 0xf000 +; GFX9-NEXT: s_mov_b32 s2, s10 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_mov_b32 s8, s4 +; GFX9-NEXT: s_mov_b32 s9, s5 ; GFX9-NEXT: s_mov_b32 s4, s6 ; GFX9-NEXT: s_mov_b32 s5, s7 -; GFX9-NEXT: s_mov_b32 s11, s3 -; GFX9-NEXT: s_mov_b32 s6, s2 -; GFX9-NEXT: s_mov_b32 s7, s3 +; GFX9-NEXT: s_mov_b32 s3, s11 +; GFX9-NEXT: s_mov_b32 s6, s10 +; GFX9-NEXT: s_mov_b32 s7, s11 ; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 -; GFX9-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; GFX9-NEXT: buffer_load_ushort v1, off, s[0:3], 0 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_max_f16_e32 v1, v1, v1 ; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX9-NEXT: buffer_store_short v0, off, s[8:11], 0 ; GFX9-NEXT: s_endpgm half addrspace(1)* %r, half addrspace(1)* %a, @@ -123,35 +123,33 @@ define amdgpu_kernel void @minnum_f16_imm_a( ; SI-LABEL: minnum_f16_imm_a: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s10, s2 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s8, s6 -; SI-NEXT: s_mov_b32 s9, s7 +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; SI-NEXT: v_min_f32_e32 v0, 0x40400000, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: minnum_f16_imm_a: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -163,14 +161,12 @@ ; ; GFX9-LABEL: minnum_f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s4, s2 +; GFX9-NEXT: s_mov_b32 s5, s3 +; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, s2 ; GFX9-NEXT: s_mov_b32 s7, s3 ; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -191,35 +187,33 @@ define amdgpu_kernel void @minnum_f16_imm_b( ; SI-LABEL: minnum_f16_imm_b: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s10, s2 -; SI-NEXT: s_mov_b32 s11, s3 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s8, s6 -; SI-NEXT: s_mov_b32 s9, s7 +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; SI-NEXT: v_min_f32_e32 v0, 4.0, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: minnum_f16_imm_b: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_mov_b32 s4, s6 -; VI-NEXT: s_mov_b32 s5, s7 +; VI-NEXT: s_mov_b32 s4, s2 +; VI-NEXT: s_mov_b32 s5, s3 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -231,14 +225,12 @@ ; ; GFX9-LABEL: minnum_f16_imm_b: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_mov_b32 s4, s6 -; GFX9-NEXT: s_mov_b32 s5, s7 +; GFX9-NEXT: s_mov_b32 s4, s2 +; GFX9-NEXT: s_mov_b32 s5, s3 +; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, s2 ; GFX9-NEXT: s_mov_b32 s7, s3 ; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 @@ -261,18 +253,18 @@ ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dword s6, s[6:7], 0x0 +; SI-NEXT: s_load_dword s2, s[6:7], 0x0 ; SI-NEXT: s_load_dword s0, s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_lshr_b32 s1, s6, 16 +; SI-NEXT: s_lshr_b32 s1, s2, 16 ; SI-NEXT: v_cvt_f32_f16_e32 v1, s0 ; SI-NEXT: s_lshr_b32 s0, s0, 16 ; SI-NEXT: v_cvt_f32_f16_e32 v2, s0 ; SI-NEXT: v_cvt_f32_f16_e32 v3, s1 -; SI-NEXT: v_cvt_f32_f16_e32 v0, s6 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 @@ -281,53 +273,47 @@ ; SI-NEXT: v_min_f32_e32 v0, v0, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v2 ; SI-NEXT: v_or_b32_e32 v0, v0, v1 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: minnum_v2f16_ieee: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_load_dword s5, s[8:9], 0x0 +; VI-NEXT: s_load_dword s2, s[6:7], 0x0 +; VI-NEXT: s_load_dword s0, s[0:1], 0x0 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s4, s4 -; VI-NEXT: v_max_f16_e64 v0, s5, s5 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: s_lshr_b32 s5, s5, 16 -; VI-NEXT: v_min_f16_e32 v0, v1, v0 -; VI-NEXT: v_max_f16_e64 v1, s5, s5 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 +; VI-NEXT: s_lshr_b32 s1, s2, 16 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: s_lshr_b32 s0, s0, 16 +; VI-NEXT: v_min_f16_e32 v0, v0, v1 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: v_max_f16_e64 v2, s1, s1 ; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: minnum_v2f16_ieee: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0 +; GFX9-NEXT: s_load_dword s2, s[6:7], 0x0 +; GFX9-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 -; GFX9-NEXT: v_pk_min_f16 v0, v1, v0 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_max_f16 v1, s0, s0 +; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, <2 x half> addrspace(1)* %a, @@ -395,37 +381,33 @@ ; ; VI-LABEL: minnum_v2f16_imm_a: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v2, 0x4400 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_load_dword s2, s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v0, s4, s4 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: v_max_f16_e64 v1, s4, s4 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: v_max_f16_e64 v1, s2, s2 ; VI-NEXT: v_min_f16_e32 v0, 0x4200, v0 ; VI-NEXT: v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: minnum_v2f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x44004200 -; GFX9-NEXT: v_pk_min_f16 v0, v0, s4 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x44004200 +; GFX9-NEXT: v_pk_min_f16 v0, v0, s2 +; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, @@ -462,37 +444,33 @@ ; ; VI-LABEL: minnum_v2f16_imm_b: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: v_mov_b32_e32 v2, 0x4200 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_load_dword s4, s[6:7], 0x0 -; VI-NEXT: s_mov_b32 s1, s5 +; VI-NEXT: s_load_dword s2, s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v0, s4, s4 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: v_max_f16_e64 v1, s4, s4 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: v_max_f16_e64 v1, s2, s2 ; VI-NEXT: v_min_f16_e32 v0, 4.0, v0 ; VI-NEXT: v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: minnum_v2f16_imm_b: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_load_dword s4, s[6:7], 0x0 -; GFX9-NEXT: s_mov_b32 s1, s5 +; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: s_mov_b32 s4, 0x42004400 -; GFX9-NEXT: v_pk_min_f16 v0, v0, s4 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x42004400 +; GFX9-NEXT: v_pk_min_f16 v0, v0, s2 +; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <2 x half> addrspace(1)* %r, @@ -509,22 +487,21 @@ ; SI-LABEL: minnum_v3f16: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; SI-NEXT: s_load_dwordx2 s[8:9], s[8:9], 0x0 -; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_lshr_b32 s1, s6, 16 -; SI-NEXT: s_lshr_b32 s4, s8, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v3, s1 -; SI-NEXT: v_cvt_f32_f16_e32 v2, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v1, s6 -; SI-NEXT: v_cvt_f32_f16_e32 v5, s8 -; SI-NEXT: v_cvt_f32_f16_e32 v0, s7 -; SI-NEXT: v_cvt_f32_f16_e32 v4, s9 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v1, s2 +; SI-NEXT: s_lshr_b32 s2, s2, 16 +; SI-NEXT: s_lshr_b32 s3, s0, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v2, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v3, s2 +; SI-NEXT: v_cvt_f32_f16_e32 v5, s0 +; SI-NEXT: v_cvt_f32_f16_e32 v4, s1 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_min_f32_e32 v2, v3, v2 @@ -538,60 +515,55 @@ ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_or_b32_e32 v1, v1, v2 -; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 -; SI-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 offset:4 +; SI-NEXT: buffer_store_dword v1, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: minnum_v3f16: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; VI-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s4, s4 -; VI-NEXT: v_max_f16_e64 v0, s6, s6 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: s_lshr_b32 s6, s6, 16 -; VI-NEXT: v_min_f16_e32 v0, v1, v0 -; VI-NEXT: v_max_f16_e64 v1, s6, s6 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: s_lshr_b32 s0, s0, 16 +; VI-NEXT: v_min_f16_e32 v0, v0, v1 +; VI-NEXT: v_max_f16_e64 v1, s0, s0 +; VI-NEXT: v_max_f16_e64 v2, s2, s2 ; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_max_f16_e64 v1, s7, s7 -; VI-NEXT: v_max_f16_e64 v2, s5, s5 +; VI-NEXT: v_max_f16_e64 v1, s1, s1 +; VI-NEXT: v_max_f16_e64 v2, s3, s3 ; VI-NEXT: v_min_f16_e32 v1, v2, v1 -; VI-NEXT: buffer_store_short v1, off, s[0:3], 0 offset:4 -; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: minnum_v3f16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s4, s4 -; GFX9-NEXT: v_pk_max_f16 v0, s6, s6 -; GFX9-NEXT: v_pk_min_f16 v0, v1, v0 -; GFX9-NEXT: v_pk_max_f16 v2, s7, s7 -; GFX9-NEXT: v_pk_max_f16 v1, s5, s5 -; GFX9-NEXT: v_pk_min_f16 v1, v1, v2 -; GFX9-NEXT: buffer_store_short v1, off, s[0:3], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s2, s2 +; GFX9-NEXT: v_pk_max_f16 v1, s0, s0 +; GFX9-NEXT: v_pk_max_f16 v2, s1, s1 +; GFX9-NEXT: v_pk_max_f16 v3, s3, s3 +; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX9-NEXT: v_pk_min_f16 v1, v3, v2 +; GFX9-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <3 x half> addrspace(1)* %r, <3 x half> addrspace(1)* %a, @@ -608,27 +580,25 @@ ; SI-LABEL: minnum_v4f16: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; SI-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; SI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 -; SI-NEXT: s_lshr_b32 s4, s4, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v2, s4 -; SI-NEXT: s_lshr_b32 s4, s5, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v3, s4 -; SI-NEXT: s_lshr_b32 s4, s7, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v5, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v1, s5 -; SI-NEXT: s_lshr_b32 s4, s6, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v7, s7 -; SI-NEXT: v_cvt_f32_f16_e32 v6, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v4, s6 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 +; SI-NEXT: s_lshr_b32 s2, s2, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v1, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v2, s2 +; SI-NEXT: s_lshr_b32 s3, s3, 16 +; SI-NEXT: s_lshr_b32 s2, s1, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v3, s3 +; SI-NEXT: v_cvt_f32_f16_e32 v5, s2 +; SI-NEXT: v_cvt_f32_f16_e32 v4, s0 +; SI-NEXT: s_lshr_b32 s0, s0, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v7, s1 +; SI-NEXT: v_cvt_f32_f16_e32 v6, s0 ; SI-NEXT: v_mul_f32_e32 v5, 1.0, v5 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_min_f32_e32 v3, v3, v5 @@ -649,61 +619,57 @@ ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; SI-NEXT: v_or_b32_e32 v1, v1, v3 ; SI-NEXT: v_or_b32_e32 v0, v0, v2 -; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: minnum_v4f16: ; VI: ; %bb.0: ; %entry ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; VI-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s5, s5 -; VI-NEXT: v_max_f16_e64 v0, s7, s7 -; VI-NEXT: s_lshr_b32 s5, s5, 16 -; VI-NEXT: s_lshr_b32 s7, s7, 16 -; VI-NEXT: v_min_f16_e32 v0, v1, v0 -; VI-NEXT: v_max_f16_e64 v2, s5, s5 -; VI-NEXT: v_max_f16_e64 v1, s7, s7 +; VI-NEXT: v_max_f16_e64 v0, s3, s3 +; VI-NEXT: v_max_f16_e64 v1, s1, s1 +; VI-NEXT: s_lshr_b32 s3, s3, 16 +; VI-NEXT: s_lshr_b32 s1, s1, 16 +; VI-NEXT: v_min_f16_e32 v0, v0, v1 +; VI-NEXT: v_max_f16_e64 v1, s1, s1 +; VI-NEXT: v_max_f16_e64 v2, s3, s3 ; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_max_f16_e64 v2, s4, s4 -; VI-NEXT: v_max_f16_e64 v0, s6, s6 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: s_lshr_b32 s5, s6, 16 +; VI-NEXT: v_max_f16_e64 v0, s0, s0 +; VI-NEXT: v_max_f16_e64 v2, s2, s2 +; VI-NEXT: s_lshr_b32 s0, s0, 16 ; VI-NEXT: v_min_f16_e32 v0, v2, v0 -; VI-NEXT: v_max_f16_e64 v2, s5, s5 -; VI-NEXT: v_max_f16_e64 v3, s4, s4 +; VI-NEXT: v_max_f16_e64 v2, s0, s0 +; VI-NEXT: s_lshr_b32 s0, s2, 16 +; VI-NEXT: v_max_f16_e64 v3, s0, s0 ; VI-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: minnum_v4f16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-NEXT: s_mov_b32 s7, 0xf000 +; GFX9-NEXT: s_mov_b32 s6, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v1, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v0, s7, s7 -; GFX9-NEXT: v_pk_min_f16 v1, v1, v0 -; GFX9-NEXT: v_pk_max_f16 v2, s6, s6 -; GFX9-NEXT: v_pk_max_f16 v0, s4, s4 -; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX9-NEXT: v_pk_max_f16 v0, s3, s3 +; GFX9-NEXT: v_pk_max_f16 v1, s1, s1 +; GFX9-NEXT: v_pk_max_f16 v2, s0, s0 +; GFX9-NEXT: v_pk_max_f16 v3, s2, s2 +; GFX9-NEXT: v_pk_min_f16 v1, v0, v1 +; GFX9-NEXT: v_pk_min_f16 v0, v3, v2 +; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, <4 x half> addrspace(1)* %a, @@ -719,27 +685,23 @@ define amdgpu_kernel void @fmin_v4f16_imm_a( ; SI-LABEL: fmin_v4f16_imm_a: ; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_mov_b32 s0, s4 -; SI-NEXT: s_mov_b32 s1, s5 -; SI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: v_cvt_f32_f16_e32 v1, s5 -; SI-NEXT: s_lshr_b32 s5, s5, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v0, s4 -; SI-NEXT: v_cvt_f32_f16_e32 v2, s5 -; SI-NEXT: s_lshr_b32 s4, s4, 16 -; SI-NEXT: v_cvt_f32_f16_e32 v3, s4 +; SI-NEXT: v_cvt_f32_f16_e32 v1, s3 +; SI-NEXT: s_lshr_b32 s3, s3, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 +; SI-NEXT: v_cvt_f32_f16_e32 v2, s3 +; SI-NEXT: s_lshr_b32 s2, s2, 16 +; SI-NEXT: v_cvt_f32_f16_e32 v3, s2 ; SI-NEXT: v_mul_f32_e32 v1, 1.0, v1 ; SI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; SI-NEXT: v_min_f32_e32 v2, 4.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_min_f32_e32 v1, 0x40400000, v1 -; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_min_f32_e32 v3, 2.0, v3 +; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 ; SI-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; SI-NEXT: v_min_f32_e32 v0, 0x41000000, v0 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 @@ -749,52 +711,50 @@ ; SI-NEXT: v_or_b32_e32 v1, v1, v2 ; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; SI-NEXT: v_or_b32_e32 v0, v0, v2 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; ; VI-LABEL: fmin_v4f16_imm_a: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v0, 0x4400 -; VI-NEXT: s_mov_b32 s3, 0xf000 -; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_mov_b32 s0, s4 -; VI-NEXT: s_mov_b32 s1, s5 -; VI-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: v_max_f16_e64 v1, s5, s5 -; VI-NEXT: s_lshr_b32 s5, s5, 16 -; VI-NEXT: v_max_f16_e64 v3, s5, s5 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 -; VI-NEXT: v_min_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_min_f16_e32 v1, 0x4200, v1 -; VI-NEXT: s_lshr_b32 s4, s4, 16 -; VI-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_min_f16_e32 v0, 0x4800, v2 -; VI-NEXT: v_max_f16_e64 v2, s4, s4 -; VI-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NEXT: v_min_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: v_max_f16_e64 v0, s3, s3 +; VI-NEXT: v_max_f16_e64 v1, s2, s2 +; VI-NEXT: s_lshr_b32 s3, s3, 16 +; VI-NEXT: v_max_f16_e64 v2, s3, s3 +; VI-NEXT: v_min_f16_e32 v3, 0x4800, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0x4400 +; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_min_f16_e32 v0, 0x4200, v0 +; VI-NEXT: s_lshr_b32 s2, s2, 16 +; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: v_max_f16_e64 v0, s2, s2 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: fmin_v4f16_imm_a: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; GFX9-NEXT: s_mov_b32 s8, 0x44004200 -; GFX9-NEXT: s_mov_b32 s9, 0x40004800 -; GFX9-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s0, s4 -; GFX9-NEXT: s_mov_b32 s1, s5 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, s5, s5 -; GFX9-NEXT: v_pk_max_f16 v2, s4, s4 -; GFX9-NEXT: v_pk_min_f16 v1, v0, s8 -; GFX9-NEXT: v_pk_min_f16 v0, v2, s9 +; GFX9-NEXT: v_pk_max_f16 v0, s3, s3 +; GFX9-NEXT: v_pk_max_f16 v2, s2, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x44004200 +; GFX9-NEXT: v_pk_min_f16 v1, v0, s2 +; GFX9-NEXT: s_mov_b32 s2, 0x40004800 +; GFX9-NEXT: v_pk_min_f16 v0, v2, s2 +; GFX9-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm <4 x half> addrspace(1)* %r, Index: llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll +++ llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll @@ -137,7 +137,7 @@ ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 ; GCN-NOT: 0xffff ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NEXT: buffer_store_dword [[VVAL]] +; GCN: buffer_store_dword [[VVAL]] define amdgpu_kernel void @local_size_x_known_bits(i32 addrspace(1)* %out) { entry: %size = call i32 @llvm.r600.read.local.size.x() #0 @@ -152,7 +152,7 @@ ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c ; GCN-NOT: 0xffff ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NEXT: buffer_store_dword [[VVAL]] +; GCN: buffer_store_dword [[VVAL]] define amdgpu_kernel void @local_size_y_known_bits(i32 addrspace(1)* %out) { entry: %size = call i32 @llvm.r600.read.local.size.y() #0 @@ -167,7 +167,7 @@ ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 ; GCN-NOT: 0xffff ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] -; GCN-NEXT: buffer_store_dword [[VVAL]] +; GCN: buffer_store_dword [[VVAL]] define amdgpu_kernel void @local_size_z_known_bits(i32 addrspace(1)* %out) { entry: %size = call i32 @llvm.r600.read.local.size.z() #0 Index: llvm/test/CodeGen/AMDGPU/llvm.round.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.round.ll +++ llvm/test/CodeGen/AMDGPU/llvm.round.ll @@ -64,9 +64,9 @@ ; GFX89-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7fff{{$}} ; GFX89-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]] ; GFX89-DAG: v_mov_b32_e32 [[BFI_K:v[0-9]+]], 0x3c00 -; GFX89: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], [[BFI_K]], [[VX]] +; GFX89-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], [[BFI_K]], [[VX]] -; GFX89: v_trunc_f16_e32 [[TRUNC:v[0-9]+]], [[SX]] +; GFX89-DAG: v_trunc_f16_e32 [[TRUNC:v[0-9]+]], [[SX]] ; GFX89: v_sub_f16_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]] ; GFX89: v_cmp_ge_f16_e64 vcc, |[[SUB]]|, 0.5 ; GFX89: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[COPYSIGN]] Index: llvm/test/CodeGen/AMDGPU/local-atomics64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/local-atomics64.ll +++ llvm/test/CodeGen/AMDGPU/local-atomics64.ll @@ -364,7 +364,7 @@ ; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 ; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 ; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; GCN: ds_add_u64 {{v[0-9]+}}, v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { Index: llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir +++ llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir @@ -28,9 +28,9 @@ # GCN-LABEL: name: interleave_add64s # GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec -# GCN-NEXT: dead %12:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %4, %5, %9, implicit $exec -# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_I32_e64 %2, %3, implicit $exec -# GCN-NEXT: dead %14:vgpr_32, dead %15:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %11, implicit $exec +# GCN: dead %12:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %4, %5, %9, implicit $exec +# GCN: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_I32_e64 %2, %3, implicit $exec +# GCN: dead %14:vgpr_32, dead %15:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %11, implicit $exec name: interleave_add64s registers: - { id: 0, class: vgpr_32 } @@ -71,8 +71,8 @@ # GCN-LABEL: name: cluster_mov_addc # GCN: S_NOP 0, implicit-def $vcc -# GCN-NEXT: %2:sreg_64_xexec = S_MOV_B64 0 -# GCN-NEXT: dead %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 %0, %1, %2, implicit $exec +# GCN: %2:sreg_64_xexec = S_MOV_B64 0 +# GCN: dead %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 %0, %1, %2, implicit $exec name: cluster_mov_addc registers: - { id: 0, class: vgpr_32 } @@ -93,10 +93,10 @@ ... # GCN-LABEL: name: no_cluster_add_addc_diff_sgpr +# GCN: S_NOP 0, implicit-def $vcc # GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit $exec # GCN-NEXT: %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec # GCN-NEXT: %7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec -# GCN-NEXT: S_NOP 0, implicit-def $vcc # GCN-NEXT: %8:sreg_64_xexec = S_MOV_B64 0 # GCN-NEXT: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %8, implicit $exec name: no_cluster_add_addc_diff_sgpr @@ -149,8 +149,8 @@ # GCN-LABEL: name: cluster_cmp_cndmask # GCN: S_NOP 0, implicit-def $vcc -# GCN-NEXT: %3:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec -# GCN-NEXT: dead %4:vgpr_32 = V_CNDMASK_B32_e64 %0, %1, %3, implicit $exec +# GCN: %3:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec +# GCN: dead %4:vgpr_32 = V_CNDMASK_B32_e64 %0, %1, %3, implicit $exec name: cluster_cmp_cndmask registers: - { id: 0, class: vgpr_32 } @@ -173,8 +173,8 @@ # GCN-LABEL: name: cluster_multi_use_cmp_cndmask # GCN: %4:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec -# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec -# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec +# GCN: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec +# GCN: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec name: cluster_multi_use_cmp_cndmask registers: - { id: 0, class: vgpr_32 } Index: llvm/test/CodeGen/AMDGPU/madak.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/madak.ll +++ llvm/test/CodeGen/AMDGPU/madak.ll @@ -31,13 +31,13 @@ ; it. ; GCN-LABEL: {{^}}madak_2_use_f32: -; GFX8_9: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GFX8_9-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 ; GFX6-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GFX6-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GFX6-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 -; GFX8_9: {{flat|global}}_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} -; GFX8_9: {{flat|global}}_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} -; GFX8_9: {{flat|global}}_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} +; GFX8_9-DAG: {{flat|global}}_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} +; GFX8_9-DAG: {{flat|global}}_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} +; GFX8_9-DAG: {{flat|global}}_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}} ; GFX6-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 ; GCN-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 ; GCN-DAG: v_mac_f32_e32 [[VK]], [[VA]], [[VC]] Index: llvm/test/CodeGen/AMDGPU/merge-store-crash.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/merge-store-crash.ll +++ llvm/test/CodeGen/AMDGPU/merge-store-crash.ll @@ -7,9 +7,9 @@ @tess_lds = external addrspace(3) global [8192 x i32] ; CHECK-LABEL: {{^}}main: -; CHECK: ds_write2_b32 -; CHECK: v_mov_b32_e32 v1, v0 -; CHECK: tbuffer_store_format_xyzw v[0:3], +; CHECK-DAG: ds_write2_b32 +; CHECK-DAG: v_mov_b32_e32 v1, v0 +; CHECK-DAG: tbuffer_store_format_xyzw v[0:3], define amdgpu_vs void @main(i32 inreg %arg) { main_body: %tmp = load float, float addrspace(3)* undef, align 4 Index: llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll +++ llvm/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll @@ -9,14 +9,11 @@ ; GCN: s_load_dwordx2 s{{\[}}[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN-NOT: v_mov_b32 -; GCN: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]] -; GCN: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}} -; GCN-NOT: v_mov_b32 -; GCN: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]] -; GCN-NOT: v_mov_b32 - -; GCN: v_add_i32_e32 v[[PTRLO:[0-9]+]], vcc, v[[LDPTRLO]], v[[VARG1LO]] -; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]] +; GCN-DAG: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]] +; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]] +; GCN-DAG: v_add_i32_e32 v[[PTRLO:[0-9]+]], vcc, v[[LDPTRLO]], v[[VARG1LO]] +; GCN-DAG: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]] ; GCN: buffer_load_ubyte v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, define amdgpu_kernel void @clobber_vgpr_pair_pointer_add(i64 %arg1, [8 x i32], i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 { Index: llvm/test/CodeGen/AMDGPU/mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mul.ll +++ llvm/test/CodeGen/AMDGPU/mul.ll @@ -223,16 +223,15 @@ ; VI: s_mul_i32 ; VI: v_mul_hi_u32 ; VI: v_mul_hi_u32 -; VI: s_mul_i32 ; VI: v_mad_u64_u32 ; VI: s_mul_i32 ; VI: v_mad_u64_u32 ; VI: s_mul_i32 ; VI: s_mul_i32 +; VI: s_mul_i32 ; VI: v_mad_u64_u32 ; VI: s_mul_i32 - ; GCN: buffer_store_dwordx4 define amdgpu_kernel void @s_mul_i128(i128 addrspace(1)* %out, [8 x i32], i128 %a, [8 x i32], i128 %b) nounwind #0 { %mul = mul i128 %a, %b Index: llvm/test/CodeGen/AMDGPU/mul_int24.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mul_int24.ll +++ llvm/test/CodeGen/AMDGPU/mul_int24.ll @@ -28,7 +28,7 @@ ; GCN-NOT: bfe ; GCN-NOT: ashr ; GCN: v_mul_hi_i32_i24_e32 [[RESULT:v[0-9]+]], -; GCN-NEXT: buffer_store_dword [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] ; EG: ASHR ; EG: ASHR @@ -131,8 +131,8 @@ ; SI-NOT: bfe ; SI: v_mul_hi_i32_i24_e32 v[[MUL_HI:[0-9]+]], -; SI-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] -; SI-NEXT: buffer_store_dword v[[HI]] +; SI: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] +; SI: buffer_store_dword v[[HI]] define amdgpu_kernel void @test_smulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) { entry: %tmp0 = shl i33 %a, 9 Index: llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll +++ llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll @@ -101,7 +101,7 @@ ; FUNC-LABEL: {{^}}test_umulhi24_i32_i64: ; GCN-NOT: and ; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]], -; GCN-NEXT: buffer_store_dword [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) { entry: %a.24 = and i32 %a, 16777215 @@ -118,7 +118,7 @@ ; FUNC-LABEL: {{^}}test_umulhi24: ; GCN-NOT: and ; GCN: v_mul_hi_u32_u24_e32 [[RESULT:v[0-9]+]], -; GCN-NEXT: buffer_store_dword [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) { entry: %a.24 = and i64 %a, 16777215 @@ -205,8 +205,8 @@ ; GCN-NOT: and ; GCN-NOT: lshr ; GCN: v_mul_hi_u32_u24_e32 v[[MUL_HI:[0-9]+]], -; GCN-NEXT: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] -; GCN-NEXT: buffer_store_dword v[[HI]] +; GCN: v_and_b32_e32 v[[HI:[0-9]+]], 1, v[[MUL_HI]] +; GCN: buffer_store_dword v[[HI]] define amdgpu_kernel void @test_umulhi24_i33(i32 addrspace(1)* %out, i33 %a, i33 %b) { entry: %tmp0 = shl i33 %a, 9 Index: llvm/test/CodeGen/AMDGPU/or.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/or.ll +++ llvm/test/CodeGen/AMDGPU/or.ll @@ -95,11 +95,8 @@ ; SI: s_load_dwordx2 s{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0x13|0x4c}} ; SI-NOT: or_b32 ; SI: s_or_b32 s[[VAL_LO]], s[[VAL_LO]], 63 -; SI-NOT: or_b32 -; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[VAL_LO]] -; SI-NOT: or_b32 -; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]] -; SI-NOT: or_b32 +; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[VAL_LO]] +; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]] ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) { %or = or i64 %a, 63 Index: llvm/test/CodeGen/AMDGPU/permute.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/permute.ll +++ llvm/test/CodeGen/AMDGPU/permute.ll @@ -156,8 +156,8 @@ ; GCN-LABEL: {{^}}known_ffff0500: ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +; GCN-DAG: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] +; GCN-DAG: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) { bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -178,8 +178,8 @@ ; GCN-LABEL: {{^}}known_050c0c00: ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}} -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +; GCN-DAG: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] +; GCN-DAG: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) { bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -199,8 +199,8 @@ ; GCN-LABEL: {{^}}known_ffff8004: ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 -; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] -; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +; GCN-DAG: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] +; GCN-DAG: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) { bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/test/CodeGen/AMDGPU/ret.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ret.ll +++ llvm/test/CodeGen/AMDGPU/ret.ll @@ -17,14 +17,13 @@ } ; GCN-LABEL: {{^}}vgpr_literal: -; GCN: v_mov_b32_e32 v4, v0 -; GCN: exp mrt0 v4, v4, v4, v4 done vm +; GCN: exp mrt0 v0, v0, v0, v0 done vm ; GCN-DAG: v_mov_b32_e32 v0, 1.0 ; GCN-DAG: v_mov_b32_e32 v1, 2.0 ; GCN-DAG: v_mov_b32_e32 v2, 4.0 ; GCN-DAG: v_mov_b32_e32 v3, -1.0 -; GCN: s_waitcnt expcnt(0) +; GCN-DAG: s_waitcnt expcnt(0) ; GCN-NOT: s_endpgm define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: @@ -226,15 +225,14 @@ } ; GCN-LABEL: {{^}}structure_literal: -; GCN: v_mov_b32_e32 v3, v0 -; GCN: exp mrt0 v3, v3, v3, v3 done vm +; GCN: exp mrt0 v0, v0, v0, v0 done vm ; GCN-DAG: v_mov_b32_e32 v0, 1.0 ; GCN-DAG: s_mov_b32 s0, 2 ; GCN-DAG: s_mov_b32 s1, 3 ; GCN-DAG: v_mov_b32_e32 v1, 2.0 ; GCN-DAG: v_mov_b32_e32 v2, 4.0 -; GCN: s_waitcnt expcnt(0) +; GCN-DAG: s_waitcnt expcnt(0) define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 Index: llvm/test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -170,12 +170,13 @@ ; CI. ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8: -; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} +; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} +; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} +; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} +; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} ; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} ; GCN-NOHSA-NOT: v_add -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} -; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} + ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} Index: llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll +++ llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll @@ -136,9 +136,9 @@ ; FIXME: fabs should fold away ; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32: -; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s +; GCN-DAG: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s ; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]] define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 { %x = load volatile float, float addrspace(1)* undef @@ -151,9 +151,9 @@ } ; GCN-LABEL: {{^}}add_select_posk_posk_f32: -; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s +; GCN-DAG: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 { %x = load volatile float, float addrspace(1)* undef @@ -184,8 +184,8 @@ } ; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32: -; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: buffer_load_dword [[Y:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000 ; GCN-DAG: v_and_b32_e32 [[FABS_X:v[0-9]+]], 0x7fffffff, [[X]] @@ -372,9 +372,9 @@ } ; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32: -; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: buffer_load_dword [[Y:v[0-9]+]] -; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]] @@ -390,9 +390,9 @@ } ; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32: -; GCN: buffer_load_dword [[X:v[0-9]+]] -; GCN: buffer_load_dword [[Y:v[0-9]+]] -; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]] +; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983 ; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc ; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc @@ -410,10 +410,10 @@ } ; GCN-LABEL: {{^}}add_select_negk_negk_f32: -; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_cmp_eq_u32_e64 -; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s +; GCN-DAG: v_cmp_eq_u32_e64 +; GCN-DAG: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s ; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]] define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 { %x = load volatile float, float addrspace(1)* undef @@ -442,9 +442,9 @@ } ; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32: -; GCN: buffer_load_dword [[X:v[0-9]+]] +; GCN-DAG: buffer_load_dword [[X:v[0-9]+]] -; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s +; GCN-DAG: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s ; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[X]], [[SELECT]] define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 { %x = load volatile float, float addrspace(1)* undef Index: llvm/test/CodeGen/AMDGPU/select-i1.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/select-i1.ll +++ llvm/test/CodeGen/AMDGPU/select-i1.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN0 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN1 %s ; FIXME: This should go in existing select.ll test, except the current testcase there is broken on GCN @@ -13,16 +13,33 @@ ret void } -; GCN-LABEL: {{^}}s_minmax_i1: -; GCN: s_load_dword [[LOAD:s[0-9]+]], -; GCN-DAG: s_lshr_b32 [[A:s[0-9]+]], [[LOAD]], 8 -; GCN-DAG: s_lshr_b32 [[B:s[0-9]+]], [[LOAD]], 16 -; GCN-DAG: s_and_b32 [[COND:s[0-9]+]], 1, [[LOAD]] -; GCN: v_mov_b32_e32 [[V_B:v[0-9]+]], [[B]] -; GCN: v_mov_b32_e32 [[V_A:v[0-9]+]], [[A]] -; GCN: v_cmp_eq_u32_e64 vcc, [[COND]], 1 -; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], [[V_B]], [[V_A]] -; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, [[SEL]] +; FIXME: The GCNT should be sufficient here. + +; GCN0-LABEL: {{^}}s_minmax_i1: +; GCN0: s_load_dword [[LOAD:s[0-9]+]], +; GCN0-DAG: s_lshr_b32 [[A:s[0-9]+]], [[LOAD]], 8 +; GCN0-DAG: s_lshr_b32 [[B:s[0-9]+]], [[LOAD]], 16 +; GCN0-DAG: s_and_b32 [[COND:s[0-9]+]], 1, [[LOAD]] +; GCN0: v_mov_b32_e32 [[V_B:v[0-9]+]], [[B]] +; GCN0: v_mov_b32_e32 [[V_A:v[0-9]+]], [[A]] +; GCN0-DAG: v_cmp_eq_u32_e64 vcc, [[COND]], 1 +; GCN0: v_cndmask_b32_e32 [[SEL:v[0-9]+]], [[V_B]], [[V_A]] +; GCN0: v_and_b32_e32 v{{[0-9]+}}, 1, [[SEL]] + +; GCN1-LABEL: {{^}}s_minmax_i1: +; GCN1: s_load_dword [[LOAD:s[0-9]+]], +; GCN1-DAG: s_lshr_b32 [[A:s[0-9]+]], [[LOAD]], 8 +; GCN1-DAG: s_lshr_b32 [[B:s[0-9]+]], [[LOAD]], 16 +; GCN1-DAG: s_and_b32 [[COND:s[0-9]+]], 1, [[LOAD]] +; GCN1-DAG: v_mov_b32_e32 [[V_A:v[0-9]+]], [[A]] +; GCN1-DAG: v_mov_b32_e32 [[V_B:v[0-9]+]], [[B]] +; GCN1-DAG: v_cmp_eq_u32_e64 vcc, [[COND]], 1 +; GCN1: v_cndmask_b32_e32 [[SEL:v[0-9]+]], [[V_B]], [[V_A]] +; GCN1: v_and_b32_e32 v{{[0-9]+}}, 1, [[SEL]] + + + + define amdgpu_kernel void @s_minmax_i1(i1 addrspace(1)* %out, [8 x i32], i1 zeroext %cond, i1 zeroext %a, i1 zeroext %b) nounwind { %cmp = icmp slt i1 %cond, false %sel = select i1 %cmp, i1 %a, i1 %b Index: llvm/test/CodeGen/AMDGPU/select.f16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/select.f16.ll +++ llvm/test/CodeGen/AMDGPU/select.f16.ll @@ -200,15 +200,16 @@ ; SI: v_cmp_lt_f32_e32 ; SI: v_cndmask_b32_e32 ; SI: v_cmp_lt_f32_e32 vcc, 0.5 +; SI: v_cvt_f16_f32_e32 ; SI: v_cndmask_b32_e32 +; SI: v_cvt_f16_f32_e32 + ; VI: v_cmp_lt_f16_e32 ; VI: v_cndmask_b32_e32 ; VI: v_cmp_lt_f16_e32 ; VI: v_cndmask_b32_e32 -; SI: v_cvt_f16_f32_e32 -; SI: v_cvt_f16_f32_e32 ; GCN: s_endpgm define amdgpu_kernel void @select_v2f16_imm_a( <2 x half> addrspace(1)* %r, @@ -236,15 +237,16 @@ ; SI: v_cmp_gt_f32_e32 ; SI: v_cndmask_b32_e32 ; SI: v_cmp_gt_f32_e32 vcc, 0.5 +; SI: v_cvt_f16_f32_e32 ; SI: v_cndmask_b32_e32 +; SI: v_cvt_f16_f32_e32 ; VI: v_cmp_gt_f16_e32 ; VI: v_cndmask_b32_e32 ; VI: v_cmp_gt_f16_e32 ; VI: v_cndmask_b32_e32 -; SI: v_cvt_f16_f32_e32 -; SI: v_cvt_f16_f32_e32 + ; GCN: s_endpgm define amdgpu_kernel void @select_v2f16_imm_b( <2 x half> addrspace(1)* %r, Index: llvm/test/CodeGen/AMDGPU/setcc-opt.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/setcc-opt.ll +++ llvm/test/CodeGen/AMDGPU/setcc-opt.ll @@ -5,9 +5,9 @@ ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: ; GCN-NOT: v_cmp ; GCN: v_cmp_ne_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT:buffer_store_byte [[RESULT]] -; GCN-NEXT: s_endpgm +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN:buffer_store_byte [[RESULT]] +; GCN: s_endpgm ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 @@ -22,9 +22,9 @@ ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: ; GCN-NOT: v_cmp ; GCN: v_cmp_ne_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] -; GCN-NEXT: s_endpgm +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm ; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W ; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 @@ -39,9 +39,9 @@ ; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1: ; GCN-NOT: v_cmp ; GCN: v_cmp_eq_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] -; GCN-NEXT: s_endpgm +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp eq i32 %a, %b %ext = sext i1 %icmp0 to i32 @@ -53,9 +53,9 @@ ; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1: ; GCN-NOT: v_cmp ; GCN: v_cmp_eq_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] -; GCN-NEXT: s_endpgm +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = sext i1 %icmp0 to i32 @@ -67,9 +67,9 @@ ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: ; GCN-NOT: v_cmp ; GCN: v_cmp_ne_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] -; GCN-NEXT: s_endpgm +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp eq i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -81,9 +81,9 @@ ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: ; GCN-NOT: v_cmp ; GCN: v_cmp_ne_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] -; GCN-NEXT: s_endpgm +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -95,9 +95,9 @@ ; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: ; GCN-NOT: v_cmp ; GCN: v_cmp_eq_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] -; GCN-NEXT: s_endpgm +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN: buffer_store_byte [[RESULT]] +; GCN: s_endpgm define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp eq i32 %a, %b %ext = zext i1 %icmp0 to i32 @@ -109,8 +109,8 @@ ; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: ; GCN-NOT: v_cmp ; GCN: v_cmp_eq_u32_e32 vcc, -; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc -; GCN-NEXT: buffer_store_byte [[RESULT]] +; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc +; GCN: buffer_store_byte [[RESULT]] define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %icmp0 = icmp ne i32 %a, %b %ext = zext i1 %icmp0 to i32 Index: llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll +++ llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll @@ -27,9 +27,9 @@ ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} @@ -72,9 +72,9 @@ ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} Index: llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll +++ llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll @@ -23,9 +23,9 @@ ; Extract the high bit of the high half ; GCN-LABEL: {{^}}v_uextract_bit_63_i64: ; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} -; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -89,9 +89,9 @@ ; GCN-LABEL: {{^}}v_uextract_bit_33_i64: ; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} -; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}} -; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -173,9 +173,9 @@ ; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64: ; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} -; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2 -; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -208,9 +208,9 @@ ; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64: ; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} -; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 -; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -351,7 +351,7 @@ ; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64: ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO_SHR:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO_BFE:[0-9]+]], v[[ZERO_SHR]] +; GCN-DAG: v_mov_b32_e32 v[[ZERO_BFE:[0-9]+]], v[[ZERO_SHR]] ; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]] ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO_SHR]]{{\]}} Index: llvm/test/CodeGen/AMDGPU/shift-i128.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shift-i128.ll +++ llvm/test/CodeGen/AMDGPU/shift-i128.ll @@ -147,18 +147,18 @@ ; GCN-LABEL: v_lshr_i128_kv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: v_mov_b32_e32 v3, 0 ; GCN-NEXT: s_mov_b32 s7, 0 ; GCN-NEXT: s_movk_i32 s6, 0x41 -; GCN-NEXT: v_mov_b32_e32 v3, 0x41 -; GCN-NEXT: v_lshr_b64 v[1:2], s[6:7], v0 +; GCN-NEXT: v_lshr_b64 v[4:5], s[6:7], v0 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 -; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc -; GCN-NEXT: v_mov_b32_e32 v2, 0 -; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v0, 0x41 +; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] %shl = lshr i128 65, %rhs ret i128 %shl Index: llvm/test/CodeGen/AMDGPU/shl.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shl.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -12,20 +12,20 @@ ; VI: s_load_dword s ; VI: s_lshr_b32 ; VI: s_lshr_b32 +; VI: s_lshl_b32 ; VI: s_and_b32 ; VI: s_and_b32 ; VI: s_lshl_b32 ; VI: s_lshl_b32 -; VI: s_lshl_b32 ; VI: s_and_b32 ; VI: s_or_b32 ; CI: s_load_dword s ; CI: s_load_dword s ; CI: s_lshr_b32 -; CI: s_and_b32 ; CI: s_lshr_b32 ; CI: s_lshl_b32 +; CI: s_and_b32 ; CI: s_lshl_b32 ; CI: s_lshl_b32 ; CI: s_and_b32 Index: llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll +++ llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -34,8 +34,8 @@ ; GCN-LABEL: {{^}}load_shl_base_lds_1: ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} -; GCN: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 ; GCN: v_add_{{[iu]}}32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} +; GCN: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 ; GCN-DAG: buffer_store_dword [[RESULT]] ; GCN-DAG: buffer_store_dword [[ADDUSE]] ; GCN: s_endpgm Index: llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -11,7 +11,7 @@ ; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:16 ; SGPR-NEXT: s_waitcnt vmcnt(0) ; SGPR-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]] -; SGPR-NEXT: s_nop 4 +; SGPR: s_nop 2 ; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0 ; Make sure scratch wave offset register is correctly incremented and Index: llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -148,9 +148,9 @@ ; GCN: s_load_dword ; GCN: s_load_dword ; GCN: s_load_dword +; GFX9: global_store_dword ; GCN: ds_write_b32 ; CI: buffer_store_dword -; GFX9: global_store_dword define amdgpu_kernel void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(4)* %ptr0) #0 { %ptr1 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 1 %ptr2 = getelementptr inbounds i32, i32 addrspace(4)* %ptr0, i64 2 @@ -190,10 +190,10 @@ ; GCN-LABEL: {{^}}reorder_local_offsets: ; GCN: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102 +; GFX9: global_store_dword ; GCN-DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100 ; GCN-DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 ; CI: buffer_store_dword -; GFX9: global_store_dword ; GCN: s_endpgm define amdgpu_kernel void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 { %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 @@ -252,13 +252,11 @@ ; CI-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:28{{$}} ; CI-NEXT: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:44{{$}} -; CI: v_mov_b32 -; CI: v_mov_b32 - -; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} - -; CI: v_add_i32 -; CI: v_add_i32 +; CI-DAG: v_mov_b32 +; CI-DAG: v_mov_b32 +; CI-DAG: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: v_add_i32 +; CI-DAG: v_add_i32 ; CI: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} Index: llvm/test/CodeGen/AMDGPU/sign_extend.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sign_extend.ll +++ llvm/test/CodeGen/AMDGPU/sign_extend.ll @@ -65,7 +65,7 @@ ; GCN-LABEL: {{^}}s_sext_i1_to_i16: ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1 -; GCN-NEXT: buffer_store_short [[RESULT]] +; GCN: buffer_store_short [[RESULT]] define amdgpu_kernel void @s_sext_i1_to_i16(i16 addrspace(1)* %out, i32 %a, i32 %b) nounwind { %cmp = icmp eq i32 %a, %b %sext = sext i1 %cmp to i16 @@ -79,7 +79,7 @@ ; is optimized to a select very early. ; GCN-LABEL: {{^}}s_sext_i1_to_i16_with_and: ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1 -; GCN-NEXT: buffer_store_short [[RESULT]] +; GCN: buffer_store_short [[RESULT]] define amdgpu_kernel void @s_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) nounwind { %cmp0 = icmp eq i32 %a, %b %cmp1 = icmp eq i32 %c, %d @@ -91,7 +91,7 @@ ; GCN-LABEL: {{^}}v_sext_i1_to_i16_with_and: ; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1 -; GCN-NEXT: buffer_store_short [[RESULT]] +; GCN: buffer_store_short [[RESULT]] define amdgpu_kernel void @v_sext_i1_to_i16_with_and(i16 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %cmp0 = icmp eq i32 %a, %tid Index: llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -32,12 +32,12 @@ ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 -; VI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; VI: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}} ; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[TWO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_and_b32 Index: llvm/test/CodeGen/AMDGPU/sub.i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sub.i16.ll +++ llvm/test/CodeGen/AMDGPU/sub.i16.ll @@ -87,7 +87,7 @@ ; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64: ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] -; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 +; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 ; VI-DAG: v_sub_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]] ; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { Index: llvm/test/CodeGen/AMDGPU/sub.v2i16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sub.v2i16.ll +++ llvm/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -144,8 +144,8 @@ ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; VI-NOT: v_subrev_i16 -; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080 ; VI: flat_load_dword +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080 ; VI: v_add_u16_sdwa [[ADD:v[0-9]+]], v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NOT: v_subrev_i16 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[ADD]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD Index: llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -4,17 +4,17 @@ target triple="amdgcn--" ; CHECK-LABEL: foobar: -; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CHECK: s_load_dwordx2 s[2:3], s[0:1], 0x9 ; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb ; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64 -; CHECK-NEXT: s_mov_b32 s2, -1 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v1, s5 +; CHECK-NEXT: s_mov_b32 s2, -1 +; CHECK-NEXT: v_mov_b32_e32 v1, s3 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc ; CHECK: BB0_1: -; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr4_sgpr5 killed $exec +; CHECK-NEXT: ; kill: def $vgpr0_vgpr1 killed $sgpr2_sgpr3 killed $exec ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK: BB0_2: Index: llvm/test/CodeGen/AMDGPU/trap.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/trap.ll +++ llvm/test/CodeGen/AMDGPU/trap.ll @@ -58,7 +58,7 @@ ; GCN-LABEL: {{^}}hsa_debugtrap: ; HSA-TRAP: enable_trap_handler = 0 ; HSA-TRAP: s_trap 3 -; HSA-TRAP: flat_store_dword v[0:1], v3 +; HSA-TRAP: flat_store_dword v[0:1], v2 ; for llvm.debugtrap in non-hsa path without ABI, generate a warning and a s_endpgm instruction ; NO-HSA-TRAP: enable_trap_handler = 0 Index: llvm/test/CodeGen/AMDGPU/xor.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/xor.ll +++ llvm/test/CodeGen/AMDGPU/xor.ll @@ -206,10 +206,10 @@ ; SI-NOT: xor_b32 ; SI: s_xor_b32 s[[VAL_LO]], s{{[0-9]+}}, 63 ; SI-NOT: xor_b32 -; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], s{{[0-9]+}} -; SI-NOT: xor_b32 ; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s{{[0-9]+}} ; SI-NOT: xor_b32 +; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], s{{[0-9]+}} +; SI-NOT: xor_b32 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} define amdgpu_kernel void @scalar_xor_inline_imm_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) { %or = xor i64 %a, 63 Index: llvm/test/CodeGen/AMDGPU/zero_extend.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/zero_extend.ll +++ llvm/test/CodeGen/AMDGPU/zero_extend.ll @@ -36,9 +36,15 @@ } ; GCN-LABEL: {{^}}s_cmp_zext_i1_to_i64: -; GCN: s_mov_b32 s{{[0-9]+}}, 0 -; GCN: v_cmp_eq_u32 -; GCN: v_cndmask_b32 +; SI: v_cmp_eq_u32 +; SI: v_cndmask_b32 +; SI: s_mov_b32 s{{[0-9]+}}, 0 + +; VI: v_cmp_eq_u32 +; VI: s_mov_b32 s{{[0-9]+}}, 0 +; VI: v_cndmask_b32 + + define amdgpu_kernel void @s_cmp_zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) #0 { %cmp = icmp eq i32 %a, %b %ext = zext i1 %cmp to i64 @@ -48,8 +54,11 @@ ; FIXME: Why different commute? ; GCN-LABEL: {{^}}s_cmp_zext_i1_to_i16 -; GCN: s_load_dword [[A:s[0-9]+]] -; GCN: s_load_dword [[B:s[0-9]+]] +; SI: s_load_dword [[B:s[0-9]+]] +; SI: s_load_dword [[A:s[0-9]+]] +; VI: s_load_dword [[A:s[0-9]+]] +; VI: s_load_dword [[B:s[0-9]+]] + ; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0xffff{{$}} ; GCN-DAG: s_and_b32 [[MASK_A:s[0-9]+]], [[A]], [[MASK]] Index: llvm/test/CodeGen/ARM/misched-fusion-lit.ll =================================================================== --- llvm/test/CodeGen/ARM/misched-fusion-lit.ll +++ llvm/test/CodeGen/ARM/misched-fusion-lit.ll @@ -13,7 +13,7 @@ ; CHECK-LABEL: litp: ; CHECK: movw [[R:r[0-9]+]], :lower16:litp -; CHECKDONT-NEXT: movw [[S:r[0-9]+]], :lower16:g +; CHECKDONT: movw [[S:r[0-9]+]], :lower16:g ; CHECKFUSE-NEXT: movt [[R]], :upper16:litp ; CHECKFUSE-NEXT: movw [[S:r[0-9]+]], :lower16:g ; CHECKFUSE-NEXT: movt [[S]], :upper16:g @@ -30,10 +30,10 @@ ; CHECK-LABEL: liti: ; CHECK: movw [[R:r[0-9]+]], #309 -; CHECKDONT-NEXT: add {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}} -; CHECKFUSE-NEXT: movt [[R]], #1848 +; CHECKDONT: add {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}} +; CHECKFUSE: movt [[R]], #1848 ; CHECKFUSE: movw [[S:r[0-9]+]], :lower16:g -; CHECKFUSE-NEXT: movt [[S]], :upper16:g -; CHECKFUSE-NEXT: movw [[T:r[0-9]+]], #48879 -; CHECKFUSE-NEXT: movt [[T]], #61536 +; CHECKFUSE: movt [[S]], :upper16:g +; CHECKFUSE: movw [[T:r[0-9]+]], #48879 +; CHECKFUSE: movt [[T]], #61536 } Index: llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll =================================================================== --- llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -183,12 +183,12 @@ ; CHECK-NEXT: lfd 31, 272(1) ; CHECK-NEXT: bc 12, 20, .LBB0_13 ; CHECK-NEXT: # %bb.10: # %bb2 -; CHECK-NEXT: fneg 29, 30 -; CHECK-NEXT: fneg 28, 31 +; CHECK-NEXT: fneg 29, 31 +; CHECK-NEXT: fneg 28, 30 ; CHECK-NEXT: li 29, 0 ; CHECK-NEXT: lis 3, 16864 -; CHECK-NEXT: stfd 28, 48(1) -; CHECK-NEXT: stfd 29, 40(1) +; CHECK-NEXT: stfd 29, 48(1) +; CHECK-NEXT: stfd 28, 40(1) ; CHECK-NEXT: stw 29, 84(1) ; CHECK-NEXT: stw 3, 80(1) ; CHECK-NEXT: stw 29, 76(1) @@ -219,7 +219,7 @@ ; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) ; CHECK-NEXT: lis 3, .LCPI0_3@ha ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 29, 28 +; CHECK-NEXT: fadd 2, 28, 29 ; CHECK-NEXT: mtfsf 1, 1 ; CHECK-NEXT: lfs 1, .LCPI0_3@l(3) ; CHECK-NEXT: fcmpu 0, 30, 0 Index: llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll =================================================================== --- llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll +++ llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll @@ -10,61 +10,133 @@ define signext i32 @main() { ; CHECK-LABEL: main: -; CHECK: li 3, -32477 -; CHECK: li 6, 234 -; CHECK: sth 3, 46(1) -; CHECK: lis 3, 0 -; CHECK: ori 4, 3, 33059 -; CHECK: sync -; CHECK: .LBB0_1: # %L.entry -; CHECK: lharx 3, 0, 5 -; CHECK: cmpw 4, 3 -; CHECK: bne 0, .LBB0_3 -; CHECK: sthcx. 6, 0, 5 -; CHECK: bne 0, .LBB0_1 -; CHECK: b .LBB0_4 -; CHECK: .LBB0_3: # %L.entry -; CHECK: sthcx. 3, 0, 5 -; CHECK: .LBB0_4: # %L.entry -; CHECK: cmplwi 3, 33059 -; CHECK: lwsync -; CHECK: lhz 3, 46(1) -; CHECK: cmplwi 3, 234 +; CHECK: # %bb.0: # %L.entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: li 3, -32477 +; CHECK-NEXT: addi 5, 1, 46 +; CHECK-NEXT: li 6, 234 +; CHECK-NEXT: sth 3, 46(1) +; CHECK-NEXT: lis 3, 0 +; CHECK-NEXT: ori 4, 3, 33059 +; CHECK-NEXT: sync +; CHECK-NEXT: .LBB0_1: # %L.entry +; CHECK-NEXT: # +; CHECK-NEXT: lharx 3, 0, 5 +; CHECK-NEXT: cmpw 4, 3 +; CHECK-NEXT: bne 0, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %L.entry +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 6, 0, 5 +; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_3: # %L.entry +; CHECK-NEXT: sthcx. 3, 0, 5 +; CHECK-NEXT: .LBB0_4: # %L.entry +; CHECK-NEXT: cmplwi 3, 33059 +; CHECK-NEXT: lwsync +; CHECK-NEXT: bne 0, .LBB0_7 +; CHECK-NEXT: # %bb.5: # %L.B0000 +; CHECK-NEXT: lhz 3, 46(1) +; CHECK-NEXT: cmplwi 3, 234 +; CHECK-NEXT: bne 0, .LBB0_8 +; CHECK-NEXT: # %bb.6: # %L.B0001 +; CHECK-NEXT: addis 3, 2, .Lstr.2@toc@ha +; CHECK-NEXT: addi 3, 3, .Lstr.2@toc@l +; CHECK-NEXT: bl puts +; CHECK-NEXT: nop +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_7: # %L.B0003 +; CHECK-NEXT: addis 3, 2, .Lstr@toc@ha +; CHECK-NEXT: addi 3, 3, .Lstr@toc@l +; CHECK-NEXT: b .LBB0_9 +; CHECK-NEXT: .LBB0_8: # %L.B0005 +; CHECK-NEXT: addis 3, 2, .Lstr.1@toc@ha +; CHECK-NEXT: addi 3, 3, .Lstr.1@toc@l +; CHECK-NEXT: .LBB0_9: # %L.B0003 +; CHECK-NEXT: bl puts +; CHECK-NEXT: nop +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: .LBB0_10: # %L.B0003 +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr ; ; CHECK-P7-LABEL: main: -; CHECK-P7: li 3, -32477 -; CHECK-P7: lis 5, 0 -; CHECK-P7: li 7, 0 -; CHECK-P7: sth 3, 46(1) -; CHECK-P7: li 6, 234 -; CHECK-P7: ori 5, 5, 33059 -; CHECK-P7: rlwinm 3, 4, 3, 27, 27 -; CHECK-P7: ori 7, 7, 65535 -; CHECK-P7: sync -; CHECK-P7: slw 6, 6, 3 -; CHECK-P7: slw 8, 5, 3 -; CHECK-P7: slw 5, 7, 3 -; CHECK-P7: rldicr 4, 4, 0, 61 -; CHECK-P7: and 7, 6, 5 -; CHECK-P7: and 8, 8, 5 -; CHECK-P7: .LBB0_1: # %L.entry -; CHECK-P7: lwarx 9, 0, 4 -; CHECK-P7: and 6, 9, 5 -; CHECK-P7: cmpw 0, 6, 8 -; CHECK-P7: bne 0, .LBB0_3 -; CHECK-P7: andc 9, 9, 5 -; CHECK-P7: or 9, 9, 7 -; CHECK-P7: stwcx. 9, 0, 4 -; CHECK-P7: bne 0, .LBB0_1 -; CHECK-P7: b .LBB0_4 -; CHECK-P7: .LBB0_3: # %L.entry -; CHECK-P7: stwcx. 9, 0, 4 -; CHECK-P7: .LBB0_4: # %L.entry -; CHECK-P7: srw 3, 6, 3 -; CHECK-P7: lwsync -; CHECK-P7: cmplwi 3, 33059 -; CHECK-P7: lhz 3, 46(1) -; CHECK-P7: cmplwi 3, 234 +; CHECK-P7: # %bb.0: # %L.entry +; CHECK-P7-NEXT: mflr 0 +; CHECK-P7-NEXT: std 0, 16(1) +; CHECK-P7-NEXT: stdu 1, -48(1) +; CHECK-P7-NEXT: .cfi_def_cfa_offset 48 +; CHECK-P7-NEXT: .cfi_offset lr, 16 +; CHECK-P7-NEXT: li 3, -32477 +; CHECK-P7-NEXT: addi 4, 1, 46 +; CHECK-P7-NEXT: lis 5, 0 +; CHECK-P7-NEXT: li 7, 0 +; CHECK-P7-NEXT: sth 3, 46(1) +; CHECK-P7-NEXT: rlwinm 3, 4, 3, 27, 27 +; CHECK-P7-NEXT: li 6, 234 +; CHECK-P7-NEXT: ori 5, 5, 33059 +; CHECK-P7-NEXT: ori 7, 7, 65535 +; CHECK-P7-NEXT: sync +; CHECK-P7-NEXT: slw 6, 6, 3 +; CHECK-P7-NEXT: slw 8, 5, 3 +; CHECK-P7-NEXT: slw 5, 7, 3 +; CHECK-P7-NEXT: rldicr 4, 4, 0, 61 +; CHECK-P7-NEXT: and 6, 6, 5 +; CHECK-P7-NEXT: and 8, 8, 5 +; CHECK-P7-NEXT: .LBB0_1: # %L.entry +; CHECK-P7-NEXT: # +; CHECK-P7-NEXT: lwarx 9, 0, 4 +; CHECK-P7-NEXT: and 7, 9, 5 +; CHECK-P7-NEXT: cmpw 0, 7, 8 +; CHECK-P7-NEXT: bne 0, .LBB0_3 +; CHECK-P7-NEXT: # %bb.2: # %L.entry +; CHECK-P7-NEXT: # +; CHECK-P7-NEXT: andc 9, 9, 5 +; CHECK-P7-NEXT: or 9, 9, 6 +; CHECK-P7-NEXT: stwcx. 9, 0, 4 +; CHECK-P7-NEXT: bne 0, .LBB0_1 +; CHECK-P7-NEXT: b .LBB0_4 +; CHECK-P7-NEXT: .LBB0_3: # %L.entry +; CHECK-P7-NEXT: stwcx. 9, 0, 4 +; CHECK-P7-NEXT: .LBB0_4: # %L.entry +; CHECK-P7-NEXT: srw 3, 7, 3 +; CHECK-P7-NEXT: lwsync +; CHECK-P7-NEXT: cmplwi 3, 33059 +; CHECK-P7-NEXT: bne 0, .LBB0_7 +; CHECK-P7-NEXT: # %bb.5: # %L.B0000 +; CHECK-P7-NEXT: lhz 3, 46(1) +; CHECK-P7-NEXT: cmplwi 3, 234 +; CHECK-P7-NEXT: bne 0, .LBB0_8 +; CHECK-P7-NEXT: # %bb.6: # %L.B0001 +; CHECK-P7-NEXT: addis 3, 2, .Lstr.2@toc@ha +; CHECK-P7-NEXT: addi 3, 3, .Lstr.2@toc@l +; CHECK-P7-NEXT: bl puts +; CHECK-P7-NEXT: nop +; CHECK-P7-NEXT: li 3, 0 +; CHECK-P7-NEXT: b .LBB0_10 +; CHECK-P7-NEXT: .LBB0_7: # %L.B0003 +; CHECK-P7-NEXT: addis 3, 2, .Lstr@toc@ha +; CHECK-P7-NEXT: addi 3, 3, .Lstr@toc@l +; CHECK-P7-NEXT: b .LBB0_9 +; CHECK-P7-NEXT: .LBB0_8: # %L.B0005 +; CHECK-P7-NEXT: addis 3, 2, .Lstr.1@toc@ha +; CHECK-P7-NEXT: addi 3, 3, .Lstr.1@toc@l +; CHECK-P7-NEXT: .LBB0_9: # %L.B0003 +; CHECK-P7-NEXT: bl puts +; CHECK-P7-NEXT: nop +; CHECK-P7-NEXT: li 3, 1 +; CHECK-P7-NEXT: .LBB0_10: # %L.B0003 +; CHECK-P7-NEXT: addi 1, 1, 48 +; CHECK-P7-NEXT: ld 0, 16(1) +; CHECK-P7-NEXT: mtlr 0 +; CHECK-P7-NEXT: blr L.entry: %value.addr = alloca i16, align 2 store i16 -32477, i16* %value.addr, align 2 Index: llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir =================================================================== --- llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir +++ llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir @@ -6136,7 +6136,7 @@ %3 = RLDICR %2, 4, 59 STXVX %1, %0, killed %3 :: (store 16 into %ir.arrayidx, !tbaa !3) ; CHECK: STXV %1, 16, killed %3 - ; CHECK-LATE: stxv 34, 16(4) + ; CHECK-LATE: stxv 34, 16(3) BLR8 implicit $lr8, implicit $rm ... Index: llvm/test/CodeGen/PowerPC/crbit-asm.ll =================================================================== --- llvm/test/CodeGen/PowerPC/crbit-asm.ll +++ llvm/test/CodeGen/PowerPC/crbit-asm.ll @@ -22,8 +22,9 @@ ; CHECK-DAG: li [[REG4:[0-9]+]], 1 ; CHECK: isel 3, [[REG4]], [[REG1]], [[REG3]] ; CHECK-NO-ISEL-LABEL: @testi1 -; CHECK-NO-ISEL: bclr 12, 20, 0 -; CHECK-NO-ISEL: ori 3, 5, 0 +; CHECK-NO-ISEL: bc 12, 20 +; CHECK-NO-ISEL: blr +; CHECK-NO-ISEL: addi 3, 4, 0 ; CHECK-NO-ISEL-NEXT: blr ; CHECK: blr } Index: llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -84,13 +84,13 @@ ; PPC64-DAG: stfd 1, [[OFFSET:-?[0-9]+]](1) ; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399 ; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019 -; PPC64-NOT: BARRIER ; PPC64-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48 ; PPC64-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52 ; PPC64-DAG: ld [[X_HI:[0-9]+]], [[OFFSET]](1) ; PPC64-DAG: rldicr [[NEW_HI_TMP:[0-9]+]], [[X_HI]], 0, 0 ; PPC64-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]] ; PPC64-DAG: xor 4, [[NEW_HI_TMP]], [[CST_LO]] +; PPC64-NOT: BARRIER ; PPC64: blr ; PPC64-P8-LABEL: test_copysign: Index: llvm/test/CodeGen/PowerPC/i1-ext-fold.ll =================================================================== --- llvm/test/CodeGen/PowerPC/i1-ext-fold.ll +++ llvm/test/CodeGen/PowerPC/i1-ext-fold.ll @@ -19,8 +19,9 @@ ; CHECK: isel 3, [[REG2]], [[REG1]], ; CHECK: blr -; CHECK-NO-ISEL: bclr 12, 0, 0 -; CHECK-NO-ISEL: ori 3, 5, 0 +; CHECK-NO-ISEL: bc 12, 0, +; CHECK-NO-ISEL: blr +; CHECK-NO-ISEL: addi 3, 4, 0 ; CHECK-NO-ISEL-NEXT: blr } @@ -41,8 +42,9 @@ ; CHECK: isel 3, [[REG2]], [[REG1]], ; CHECK: blr -; CHECK-NO-ISEL: bclr 12, 0, 0 -; CHECK-NO-ISEL: ori 3, 5, 0 +; CHECK-NO-ISEL: bc 12, 0 +; CHECK-NO-ISEL: blr +; CHECK-NO-ISEL: addi 3, 4, 0 ; CHECK-NO-ISEL-NEXT: blr } @@ -62,7 +64,6 @@ ; CHECK: blr ; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 3, 5, 0 ; CHECK-NO-ISEL-NEXT: blr ; CHECK-NO-ISEL-NEXT: [[TRUE]] ; CHECK-NO-ISEL-NEXT: addi 3, 0, 0 Index: llvm/test/CodeGen/PowerPC/indirectbr.ll =================================================================== --- llvm/test/CodeGen/PowerPC/indirectbr.ll +++ llvm/test/CodeGen/PowerPC/indirectbr.ll @@ -74,10 +74,10 @@ ; PIC: lwz r[[R0:[0-9]+]], .LC0-.LTOC(r30) ; PIC-NEXT: lwz r[[R1:[0-9]+]], .LC2-.LTOC(r30) ; PIC-NEXT: stw r[[R1]], 0(r[[R0]]) -; STATIC: li r[[R0:[0-9]+]], .Ltmp0@l -; STATIC-NEXT: lis r[[R1:[0-9]+]], nextaddr@ha -; STATIC-NEXT: addis r[[R0]], r[[R0]], .Ltmp0@ha -; STATIC-NEXT: stw r[[R0]], nextaddr@l(r[[R1]] +; STATIC-DAG: li r[[R0:[0-9]+]], .Ltmp0@l +; STATIC-DAG: lis r[[R1:[0-9]+]], nextaddr@ha +; STATIC-DAG: addis r[[R0]], r[[R0]], .Ltmp0@ha +; STATIC: stw r[[R0]], nextaddr@l(r[[R1]] store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 } Index: llvm/test/CodeGen/PowerPC/licm-remat.ll =================================================================== --- llvm/test/CodeGen/PowerPC/licm-remat.ll +++ llvm/test/CodeGen/PowerPC/licm-remat.ll @@ -20,11 +20,10 @@ define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompressor"* %this, %"class.snappy::SnappyIOVecWriter"* %writer) { ; CHECK-LABEL: ZN6snappyDecompressor_: ; CHECK: # %bb.0: # %entry -; CHECK: addis 3, 2, _ZN6snappy8internalL8wordmaskE@toc@ha +; CHECK-DAG: addis 3, 2, _ZN6snappy8internalL8wordmaskE@toc@ha ; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l -; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha -; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l -; CHECK: b .LBB0_2 +; CHECK-DAG: addis [[R:[0-9]+]], 2, _ZN6snappy8internalL10char_tableE@toc@ha +; CHECK-DAG: addi 24, [[R]], _ZN6snappy8internalL10char_tableE@toc@l ; CHECK: .LBB0_2: # %for.cond ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha Index: llvm/test/CodeGen/PowerPC/pr33093.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pr33093.ll +++ llvm/test/CodeGen/PowerPC/pr33093.ll @@ -70,50 +70,50 @@ ; CHECK-LABEL: ReverseBits64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lis 4, -21846 -; CHECK-NEXT: lis 5, 21845 -; CHECK-NEXT: lis 7, -13108 +; CHECK-NEXT: lis 6, 21845 ; CHECK-NEXT: lis 8, 13107 +; CHECK-NEXT: lis 7, -13108 ; CHECK-NEXT: ori 4, 4, 43690 -; CHECK-NEXT: ori 5, 5, 21845 -; CHECK-NEXT: ori 7, 7, 52428 +; CHECK-NEXT: ori 6, 6, 21845 ; CHECK-NEXT: ori 8, 8, 13107 +; CHECK-NEXT: ori 7, 7, 52428 ; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: sldi 5, 5, 32 +; CHECK-NEXT: sldi 6, 6, 32 ; CHECK-NEXT: oris 4, 4, 43690 -; CHECK-NEXT: oris 5, 5, 21845 -; CHECK-NEXT: sldi 6, 3, 1 +; CHECK-NEXT: oris 6, 6, 21845 +; CHECK-NEXT: sldi 5, 3, 1 ; CHECK-NEXT: rldicl 3, 3, 63, 1 ; CHECK-NEXT: ori 4, 4, 43690 -; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: ori 6, 6, 21845 +; CHECK-NEXT: and 4, 5, 4 +; CHECK-NEXT: and 3, 3, 6 +; CHECK-NEXT: sldi 5, 8, 32 ; CHECK-NEXT: sldi 7, 7, 32 -; CHECK-NEXT: sldi 8, 8, 32 -; CHECK-NEXT: and 4, 6, 4 -; CHECK-NEXT: and 3, 3, 5 -; CHECK-NEXT: lis 5, -3856 -; CHECK-NEXT: oris 6, 7, 52428 -; CHECK-NEXT: oris 7, 8, 13107 ; CHECK-NEXT: or 3, 3, 4 -; CHECK-NEXT: lis 4, 3855 -; CHECK-NEXT: ori 5, 5, 61680 -; CHECK-NEXT: ori 6, 6, 52428 -; CHECK-NEXT: ori 7, 7, 13107 -; CHECK-NEXT: ori 4, 4, 3855 -; CHECK-NEXT: sldi 8, 3, 2 +; CHECK-NEXT: lis 8, 3855 +; CHECK-NEXT: oris 5, 5, 13107 +; CHECK-NEXT: oris 4, 7, 52428 +; CHECK-NEXT: lis 7, -3856 +; CHECK-NEXT: sldi 6, 3, 2 +; CHECK-NEXT: ori 5, 5, 13107 +; CHECK-NEXT: ori 7, 7, 61680 +; CHECK-NEXT: ori 4, 4, 52428 ; CHECK-NEXT: rldicl 3, 3, 62, 2 -; CHECK-NEXT: and 6, 8, 6 -; CHECK-NEXT: and 3, 3, 7 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: ori 5, 8, 3855 +; CHECK-NEXT: sldi 6, 7, 32 ; CHECK-NEXT: sldi 5, 5, 32 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: or 3, 3, 6 -; CHECK-NEXT: oris 5, 5, 61680 -; CHECK-NEXT: oris 4, 4, 3855 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: oris 4, 6, 61680 +; CHECK-NEXT: oris 5, 5, 3855 ; CHECK-NEXT: sldi 6, 3, 4 -; CHECK-NEXT: ori 5, 5, 61680 -; CHECK-NEXT: ori 4, 4, 3855 +; CHECK-NEXT: ori 4, 4, 61680 +; CHECK-NEXT: ori 5, 5, 3855 ; CHECK-NEXT: rldicl 3, 3, 60, 4 -; CHECK-NEXT: and 5, 6, 5 -; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: or 3, 3, 5 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: rldicl 4, 3, 32, 32 ; CHECK-NEXT: rlwinm 5, 3, 24, 0, 31 ; CHECK-NEXT: rlwinm 6, 4, 24, 0, 31 Index: llvm/test/CodeGen/PowerPC/pr35688.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pr35688.ll +++ llvm/test/CodeGen/PowerPC/pr35688.ll @@ -4,16 +4,21 @@ ; Function Attrs: nounwind define void @ec_GFp_nistp256_points_mul() { ; CHECK-LABEL: ec_GFp_nistp256_points_mul: -; CHECK: ld 5, 0(3) -; CHECK: li 3, 127 -; CHECK: li 4, 0 -; CHECK: subfic 6, 5, 0 -; CHECK: subfze 6, 4 -; CHECK: sradi 7, 6, 63 -; CHECK: srad 6, 6, 3 -; CHECK: subfc 5, 5, 7 -; CHECK: subfe 5, 4, 6 -; CHECK: sradi 5, 5, 63 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld 5, 0(3) +; CHECK-NEXT: li 3, 127 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_1: # %fe_cmovznz.exit.i534.i.15 +; CHECK-NEXT: # +; CHECK-NEXT: subfic 6, 5, 0 +; CHECK-NEXT: subfze 6, 4 +; CHECK-NEXT: sradi 7, 6, 63 +; CHECK-NEXT: srad 6, 6, 3 +; CHECK-NEXT: subfc 5, 5, 7 +; CHECK-NEXT: subfe 5, 4, 6 +; CHECK-NEXT: sradi 5, 5, 63 +; CHECK-NEXT: b .LBB0_1 entry: br label %fe_cmovznz.exit.i534.i.15 Index: llvm/test/CodeGen/PowerPC/setcc-logic.ll =================================================================== --- llvm/test/CodeGen/PowerPC/setcc-logic.ll +++ llvm/test/CodeGen/PowerPC/setcc-logic.ll @@ -30,9 +30,9 @@ define zeroext i1 @all_bits_set(i32 %P, i32 %Q) { ; CHECK-LABEL: all_bits_set: ; CHECK: # %bb.0: -; CHECK-NEXT: li 5, -1 ; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: xor 3, 3, 5 +; CHECK-NEXT: li 4, -1 +; CHECK-NEXT: xor 3, 3, 4 ; CHECK-NEXT: cntlzw 3, 3 ; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr @@ -83,9 +83,9 @@ define zeroext i1 @any_bits_clear(i32 %P, i32 %Q) { ; CHECK-LABEL: any_bits_clear: ; CHECK: # %bb.0: -; CHECK-NEXT: li 5, -1 ; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: xor 3, 3, 5 +; CHECK-NEXT: li 4, -1 +; CHECK-NEXT: xor 3, 3, 4 ; CHECK-NEXT: cntlzw 3, 3 ; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: xori 3, 3, 1 @@ -419,9 +419,9 @@ ; CHECK-LABEL: ne_neg1_and_ne_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: li 4, 1 ; CHECK-NEXT: subfic 3, 3, 1 -; CHECK-NEXT: subfe 3, 4, 4 +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: subfe 3, 3, 3 ; CHECK-NEXT: neg 3, 3 ; CHECK-NEXT: blr %cmp1 = icmp ne i64 %x, -1 Index: llvm/test/CodeGen/PowerPC/signbit-shift.ll =================================================================== --- llvm/test/CodeGen/PowerPC/signbit-shift.ll +++ llvm/test/CodeGen/PowerPC/signbit-shift.ll @@ -45,10 +45,10 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_tval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 41 ; CHECK-NEXT: cmpwi 0, 3, -1 -; CHECK-NEXT: li 3, 42 -; CHECK-NEXT: isel 3, 3, 4, 1 +; CHECK-NEXT: li 3, 41 +; CHECK-NEXT: li 4, 42 +; CHECK-NEXT: isel 3, 4, 3, 1 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 42, i32 41 @@ -98,10 +98,10 @@ define i32 @sel_ifpos_fval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifpos_fval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 42 ; CHECK-NEXT: cmpwi 0, 3, -1 -; CHECK-NEXT: li 3, 41 -; CHECK-NEXT: isel 3, 3, 4, 1 +; CHECK-NEXT: li 3, 42 +; CHECK-NEXT: li 4, 41 +; CHECK-NEXT: isel 3, 4, 3, 1 ; CHECK-NEXT: blr %c = icmp sgt i32 %x, -1 %r = select i1 %c, i32 41, i32 42 @@ -135,10 +135,10 @@ define i32 @sel_ifneg_tval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifneg_tval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 41 ; CHECK-NEXT: cmpwi 0, 3, 0 -; CHECK-NEXT: li 3, 42 -; CHECK-NEXT: isel 3, 3, 4, 0 +; CHECK-NEXT: li 3, 41 +; CHECK-NEXT: li 4, 42 +; CHECK-NEXT: isel 3, 4, 3, 0 ; CHECK-NEXT: blr %c = icmp slt i32 %x, 0 %r = select i1 %c, i32 42, i32 41 @@ -170,10 +170,10 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) { ; CHECK-LABEL: sel_ifneg_fval_bigger: ; CHECK: # %bb.0: -; CHECK-NEXT: li 4, 42 ; CHECK-NEXT: cmpwi 0, 3, 0 -; CHECK-NEXT: li 3, 41 -; CHECK-NEXT: isel 3, 3, 4, 0 +; CHECK-NEXT: li 3, 42 +; CHECK-NEXT: li 4, 41 +; CHECK-NEXT: isel 3, 4, 3, 0 ; CHECK-NEXT: blr %c = icmp slt i32 %x, 0 %r = select i1 %c, i32 41, i32 42 Index: llvm/test/CodeGen/PowerPC/subreg-postra-2.ll =================================================================== --- llvm/test/CodeGen/PowerPC/subreg-postra-2.ll +++ llvm/test/CodeGen/PowerPC/subreg-postra-2.ll @@ -38,10 +38,10 @@ ; CHECK: stdcx. ; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]] ; CHECK-NO-ISEL: bc 12, 20, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 7, 8, 0 +; CHECK-NO-ISEL: ori 4, 5, 0 ; CHECK-NO-ISEL-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] ; CHECK-NO-ISEL: [[TRUE]] -; CHECK-NO-ISEL: addi 7, 3, 0 +; CHECK-NO-ISEL: addi 4, 3, 0 if.then420: ; preds = %while.end418 unreachable Index: llvm/test/CodeGen/PowerPC/testBitReverse.ll =================================================================== --- llvm/test/CodeGen/PowerPC/testBitReverse.ll +++ llvm/test/CodeGen/PowerPC/testBitReverse.ll @@ -46,50 +46,50 @@ ; CHECK-LABEL: testBitReverseIntrinsicI64: ; CHECK: # %bb.0: ; CHECK-NEXT: lis 4, -21846 -; CHECK-NEXT: lis 5, 21845 -; CHECK-NEXT: lis 7, -13108 +; CHECK-NEXT: lis 6, 21845 ; CHECK-NEXT: lis 8, 13107 +; CHECK-NEXT: lis 7, -13108 ; CHECK-NEXT: ori 4, 4, 43690 -; CHECK-NEXT: ori 5, 5, 21845 -; CHECK-NEXT: ori 7, 7, 52428 +; CHECK-NEXT: ori 6, 6, 21845 ; CHECK-NEXT: ori 8, 8, 13107 +; CHECK-NEXT: ori 7, 7, 52428 ; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: sldi 5, 5, 32 +; CHECK-NEXT: sldi 6, 6, 32 ; CHECK-NEXT: oris 4, 4, 43690 -; CHECK-NEXT: oris 5, 5, 21845 -; CHECK-NEXT: sldi 6, 3, 1 +; CHECK-NEXT: oris 6, 6, 21845 +; CHECK-NEXT: sldi 5, 3, 1 ; CHECK-NEXT: rldicl 3, 3, 63, 1 ; CHECK-NEXT: ori 4, 4, 43690 -; CHECK-NEXT: ori 5, 5, 21845 +; CHECK-NEXT: ori 6, 6, 21845 +; CHECK-NEXT: and 4, 5, 4 +; CHECK-NEXT: and 3, 3, 6 +; CHECK-NEXT: sldi 5, 8, 32 ; CHECK-NEXT: sldi 7, 7, 32 -; CHECK-NEXT: sldi 8, 8, 32 -; CHECK-NEXT: and 4, 6, 4 -; CHECK-NEXT: and 3, 3, 5 -; CHECK-NEXT: lis 5, -3856 -; CHECK-NEXT: oris 6, 7, 52428 -; CHECK-NEXT: oris 7, 8, 13107 ; CHECK-NEXT: or 3, 3, 4 -; CHECK-NEXT: lis 4, 3855 -; CHECK-NEXT: ori 5, 5, 61680 -; CHECK-NEXT: ori 6, 6, 52428 -; CHECK-NEXT: ori 7, 7, 13107 -; CHECK-NEXT: ori 4, 4, 3855 -; CHECK-NEXT: sldi 8, 3, 2 +; CHECK-NEXT: lis 8, 3855 +; CHECK-NEXT: oris 5, 5, 13107 +; CHECK-NEXT: oris 4, 7, 52428 +; CHECK-NEXT: lis 7, -3856 +; CHECK-NEXT: sldi 6, 3, 2 +; CHECK-NEXT: ori 5, 5, 13107 +; CHECK-NEXT: ori 7, 7, 61680 +; CHECK-NEXT: ori 4, 4, 52428 ; CHECK-NEXT: rldicl 3, 3, 62, 2 -; CHECK-NEXT: and 6, 8, 6 -; CHECK-NEXT: and 3, 3, 7 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: ori 5, 8, 3855 +; CHECK-NEXT: sldi 6, 7, 32 ; CHECK-NEXT: sldi 5, 5, 32 -; CHECK-NEXT: sldi 4, 4, 32 -; CHECK-NEXT: or 3, 3, 6 -; CHECK-NEXT: oris 5, 5, 61680 -; CHECK-NEXT: oris 4, 4, 3855 +; CHECK-NEXT: or 3, 3, 4 +; CHECK-NEXT: oris 4, 6, 61680 +; CHECK-NEXT: oris 5, 5, 3855 ; CHECK-NEXT: sldi 6, 3, 4 -; CHECK-NEXT: ori 5, 5, 61680 -; CHECK-NEXT: ori 4, 4, 3855 +; CHECK-NEXT: ori 4, 4, 61680 +; CHECK-NEXT: ori 5, 5, 3855 ; CHECK-NEXT: rldicl 3, 3, 60, 4 -; CHECK-NEXT: and 5, 6, 5 -; CHECK-NEXT: and 3, 3, 4 -; CHECK-NEXT: or 3, 3, 5 +; CHECK-NEXT: and 4, 6, 4 +; CHECK-NEXT: and 3, 3, 5 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: rldicl 4, 3, 32, 32 ; CHECK-NEXT: rlwinm 5, 3, 24, 0, 31 ; CHECK-NEXT: rlwinm 6, 4, 24, 0, 31 Index: llvm/test/CodeGen/SystemZ/DAGCombiner_isAlias.ll =================================================================== --- llvm/test/CodeGen/SystemZ/DAGCombiner_isAlias.ll +++ llvm/test/CodeGen/SystemZ/DAGCombiner_isAlias.ll @@ -10,10 +10,10 @@ ; %.b = load i1, i1* @g_2, align 4 ; CHECK: # %bb.6: # %crc32_gentab.exit -; CHECK: larl %r2, g_2 -; CHECK-NEXT: llc %r3, 0(%r2) -; CHECK-NOT: %r2 -; CHECK: llc %r1, 0(%r2) +; CHECK: larl %r1, g_2 +; CHECK: llc %r{{[0-9]}}, 0(%r1) +; CHECK-NOT: %r1{{[^0-9]}} +; CHECK: llc %r{{[0-9]}}, 0(%r1) @g_2 = external hidden unnamed_addr global i1, align 4 @.str.1 = external hidden unnamed_addr constant [4 x i8], align 2 Index: llvm/test/CodeGen/SystemZ/pr36164.ll =================================================================== --- llvm/test/CodeGen/SystemZ/pr36164.ll +++ llvm/test/CodeGen/SystemZ/pr36164.ll @@ -46,9 +46,9 @@ ; CHECK-NEXT: lrl %r13, g_832 ; CHECK-NEXT: strl %r0, g_69 ; CHECK-NEXT: lrl %r13, g_832 -; CHECK-DAG: lghi %r13, 24 -; CHECK-DAG: strl %r2, g_69 -; CHECK-DAG: ag %r13, 0(%r1) +; CHECK-NEXT: lghi %r13, 24 +; CHECK-NEXT: ag %r13, 0(%r1) +; CHECK-NEXT: strl %r2, g_69 ; CHECK-NEXT: lrl %r12, g_832 ; CHECK-NEXT: strl %r3, g_69 ; CHECK-NEXT: lrl %r12, g_832 Index: llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll =================================================================== --- llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -38,9 +38,9 @@ ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq _xxGetOffsetForCode +; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: callq _xxCalculateMidType ; CHECK-NEXT: cmpl $1, %eax ; CHECK-NEXT: jne LBB0_1 Index: llvm/test/CodeGen/X86/GlobalISel/add-ext.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/add-ext.ll +++ llvm/test/CodeGen/X86/GlobalISel/add-ext.ll @@ -89,11 +89,11 @@ define i16* @gep16(i32 %i, i16* %x) { ; CHECK-LABEL: gep16: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $2, %rax ; CHECK-NEXT: addl $-5, %edi -; CHECK-NEXT: movslq %edi, %rcx -; CHECK-NEXT: imulq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%rcx), %rax +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: movq $2, %rcx +; CHECK-NEXT: imulq %rcx, %rax +; CHECK-NEXT: leaq (%rsi,%rax), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, -5 @@ -105,11 +105,11 @@ define i32* @gep32(i32 %i, i32* %x) { ; CHECK-LABEL: gep32: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $4, %rax ; CHECK-NEXT: addl $5, %edi -; CHECK-NEXT: movslq %edi, %rcx -; CHECK-NEXT: imulq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%rcx), %rax +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: movq $4, %rcx +; CHECK-NEXT: imulq %rcx, %rax +; CHECK-NEXT: leaq (%rsi,%rax), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 @@ -121,11 +121,11 @@ define i64* @gep64(i32 %i, i64* %x) { ; CHECK-LABEL: gep64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $8, %rax ; CHECK-NEXT: addl $-5, %edi -; CHECK-NEXT: movslq %edi, %rcx -; CHECK-NEXT: imulq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%rcx), %rax +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: movq $8, %rcx +; CHECK-NEXT: imulq %rcx, %rax +; CHECK-NEXT: leaq (%rsi,%rax), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, -5 @@ -139,11 +139,11 @@ define i128* @gep128(i32 %i, i128* %x) { ; CHECK-LABEL: gep128: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $16, %rax ; CHECK-NEXT: addl $5, %edi -; CHECK-NEXT: movslq %edi, %rcx -; CHECK-NEXT: imulq %rax, %rcx -; CHECK-NEXT: leaq (%rsi,%rcx), %rax +; CHECK-NEXT: movslq %edi, %rax +; CHECK-NEXT: movq $16, %rcx +; CHECK-NEXT: imulq %rcx, %rax +; CHECK-NEXT: leaq (%rsi,%rax), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 @@ -159,21 +159,22 @@ define void @PR20134(i32* %a, i32 %i) { ; CHECK-LABEL: PR20134: ; CHECK: # %bb.0: -; CHECK: movq $4, %rax -; CHECK-NEXT: leal 1(%rsi), %ecx -; CHECK-NEXT: movslq %ecx, %rcx -; CHECK-NEXT: imulq %rax, %rcx -; CHECK-NEXT: leaq (%rdi,%rcx), %rcx -; CHECK-NEXT: leal 2(%rsi), %edx -; CHECK-NEXT: movslq %edx, %rdx -; CHECK-NEXT: imulq %rax, %rdx -; CHECK-NEXT: leaq (%rdi,%rdx), %rdx -; CHECK-NEXT: movl (%rdx), %edx -; CHECK-NEXT: addl (%rcx), %edx -; CHECK-NEXT: movslq %esi, %rcx -; CHECK-NEXT: imulq %rax, %rcx -; CHECK-NEXT: leaq (%rdi,%rcx), %rax -; CHECK-NEXT: movl %edx, (%rax) +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: leal 1(%rsi), %eax +; CHECK-NEXT: cltq +; CHECK-NEXT: movq $4, %rcx +; CHECK-NEXT: imulq %rcx, %rax +; CHECK-NEXT: leaq (%rdi,%rax), %rax +; CHECK-NEXT: leal 2(%rsi), %edx +; CHECK-NEXT: movslq %edx, %rdx +; CHECK-NEXT: imulq %rcx, %rdx +; CHECK-NEXT: leaq (%rdi,%rdx), %rdx +; CHECK-NEXT: movl (%rdx), %edx +; CHECK-NEXT: addl (%rax), %edx +; CHECK-NEXT: movslq %esi, %rax +; CHECK-NEXT: imulq %rcx, %rax +; CHECK-NEXT: leaq (%rdi,%rax), %rax +; CHECK-NEXT: movl %edx, (%rax) ; CHECK-NEXT: retq %add1 = add nsw i32 %i, 1 @@ -195,19 +196,21 @@ ; The same as @PR20134 but sign extension is replaced with zero extension define void @PR20134_zext(i32* %a, i32 %i) { -; CHECK: # %bb.0: -; CHECK: movq $4, %rax -; CHECK-NEXT: leal 1(%rsi), %ecx -; CHECK-NEXT: imulq %rax, %rcx -; CHECK-NEXT: leaq (%rdi,%rcx), %rcx -; CHECK-NEXT: leal 2(%rsi), %edx -; CHECK-NEXT: imulq %rax, %rdx -; CHECK-NEXT: leaq (%rdi,%rdx), %rdx -; CHECK-NEXT: movl (%rdx), %edx -; CHECK-NEXT: addl (%rcx), %edx -; CHECK-NEXT: imulq %rax, %rsi -; CHECK-NEXT: leaq (%rdi,%rsi), %rax -; CHECK-NEXT: movl %edx, (%rax) +; CHECK-LABEL: PR20134_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: leal 1(%rsi), %eax +; CHECK-NEXT: movq $4, %rcx +; CHECK-NEXT: imulq %rcx, %rax +; CHECK-NEXT: leaq (%rdi,%rax), %rax +; CHECK-NEXT: leal 2(%rsi), %edx +; CHECK-NEXT: imulq %rcx, %rdx +; CHECK-NEXT: leaq (%rdi,%rdx), %rdx +; CHECK-NEXT: movl (%rdx), %edx +; CHECK-NEXT: addl (%rax), %edx +; CHECK-NEXT: imulq %rcx, %rsi +; CHECK-NEXT: leaq (%rdi,%rsi), %rax +; CHECK-NEXT: movl %edx, (%rax) ; CHECK-NEXT: retq %add1 = add nuw i32 %i, 1 Index: llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll +++ llvm/test/CodeGen/X86/GlobalISel/ashr-scalar.ll @@ -178,9 +178,9 @@ ; X64-LABEL: test_ashr_i1_imm1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movb $-1, %cl ; X64-NEXT: shlb $7, %al ; X64-NEXT: sarb $7, %al +; X64-NEXT: movb $-1, %cl ; X64-NEXT: andb $1, %cl ; X64-NEXT: sarb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax Index: llvm/test/CodeGen/X86/GlobalISel/callingconv.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/callingconv.ll +++ llvm/test/CodeGen/X86/GlobalISel/callingconv.ll @@ -3,15 +3,10 @@ ; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -verify-machineinstrs < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 define i32 @test_ret_i32() { -; X32-LABEL: test_ret_i32: -; X32: # %bb.0: -; X32-NEXT: movl $20, %eax -; X32-NEXT: retl -; -; X64-LABEL: test_ret_i32: -; X64: # %bb.0: -; X64-NEXT: movl $20, %eax -; X64-NEXT: retq +; ALL-LABEL: test_ret_i32: +; ALL: # %bb.0: +; ALL-NEXT: movl $20, %eax +; ALL-NEXT: ret{{[l|q]}} ret i32 20 } @@ -99,15 +94,10 @@ } define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { -; X32-LABEL: test_v4i32_args: -; X32: # %bb.0: -; X32-NEXT: movaps %xmm1, %xmm0 -; X32-NEXT: retl -; -; X64-LABEL: test_v4i32_args: -; X64: # %bb.0: -; X64-NEXT: movaps %xmm1, %xmm0 -; X64-NEXT: retq +; ALL-LABEL: test_v4i32_args: +; ALL: # %bb.0: +; ALL-NEXT: movaps %xmm1, %xmm0 +; ALL-NEXT: ret{{[l|q]}} ret <4 x i32> %arg2 } @@ -258,12 +248,12 @@ ; X32-NEXT: subl $44, %esp ; X32-NEXT: .cfi_def_cfa_offset 48 ; X32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill -; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill +; X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32-NEXT: movdqa %xmm2, %xmm0 ; X32-NEXT: movdqu {{[0-9]+}}(%esp), %xmm1 ; X32-NEXT: calll split_return_callee ; X32-NEXT: paddd (%esp), %xmm0 # 16-byte Folded Reload -; X32-NEXT: paddd {{[0-9]+}}(%esp), %xmm1 # 16-byte Folded Reload +; X32-NEXT: paddd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; X32-NEXT: addl $44, %esp ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl @@ -273,12 +263,12 @@ ; X64-NEXT: subq $40, %rsp ; X64-NEXT: .cfi_def_cfa_offset 48 ; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; X64-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; X64-NEXT: movdqa %xmm2, %xmm0 ; X64-NEXT: movdqa %xmm3, %xmm1 ; X64-NEXT: callq split_return_callee ; X64-NEXT: paddd (%rsp), %xmm0 # 16-byte Folded Reload -; X64-NEXT: paddd {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; X64-NEXT: paddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; X64-NEXT: addq $40, %rsp ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq Index: llvm/test/CodeGen/X86/GlobalISel/gep.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/gep.ll +++ llvm/test/CodeGen/X86/GlobalISel/gep.ll @@ -6,13 +6,13 @@ ; X64_GISEL-LABEL: test_gep_i8: ; X64_GISEL: # %bb.0: ; X64_GISEL-NEXT: # kill: def $esi killed $esi def $rsi -; X64_GISEL-NEXT: movq $4, %rax ; X64_GISEL-NEXT: movq $56, %rcx ; X64_GISEL-NEXT: # kill: def $cl killed $rcx ; X64_GISEL-NEXT: shlq %cl, %rsi ; X64_GISEL-NEXT: movq $56, %rcx ; X64_GISEL-NEXT: # kill: def $cl killed $rcx ; X64_GISEL-NEXT: sarq %cl, %rsi +; X64_GISEL-NEXT: movq $4, %rax ; X64_GISEL-NEXT: imulq %rax, %rsi ; X64_GISEL-NEXT: leaq (%rdi,%rsi), %rax ; X64_GISEL-NEXT: retq @@ -46,13 +46,13 @@ ; X64_GISEL-LABEL: test_gep_i16: ; X64_GISEL: # %bb.0: ; X64_GISEL-NEXT: # kill: def $esi killed $esi def $rsi -; X64_GISEL-NEXT: movq $4, %rax ; X64_GISEL-NEXT: movq $48, %rcx ; X64_GISEL-NEXT: # kill: def $cl killed $rcx ; X64_GISEL-NEXT: shlq %cl, %rsi ; X64_GISEL-NEXT: movq $48, %rcx ; X64_GISEL-NEXT: # kill: def $cl killed $rcx ; X64_GISEL-NEXT: sarq %cl, %rsi +; X64_GISEL-NEXT: movq $4, %rax ; X64_GISEL-NEXT: imulq %rax, %rsi ; X64_GISEL-NEXT: leaq (%rdi,%rsi), %rax ; X64_GISEL-NEXT: retq @@ -85,10 +85,10 @@ define i32* @test_gep_i32(i32 *%arr, i32 %ind) { ; X64_GISEL-LABEL: test_gep_i32: ; X64_GISEL: # %bb.0: -; X64_GISEL-NEXT: movq $4, %rax -; X64_GISEL-NEXT: movslq %esi, %rcx -; X64_GISEL-NEXT: imulq %rax, %rcx -; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax +; X64_GISEL-NEXT: movslq %esi, %rax +; X64_GISEL-NEXT: movq $4, %rcx +; X64_GISEL-NEXT: imulq %rcx, %rax +; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i32: Index: llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll +++ llvm/test/CodeGen/X86/GlobalISel/lshr-scalar.ll @@ -177,8 +177,8 @@ ; X64-LABEL: test_lshr_i1_imm1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movb $-1, %cl ; X64-NEXT: andb $1, %al +; X64-NEXT: movb $-1, %cl ; X64-NEXT: andb $1, %cl ; X64-NEXT: shrb %cl, %al ; X64-NEXT: # kill: def $al killed $al killed $eax Index: llvm/test/CodeGen/X86/abi-isel.ll =================================================================== --- llvm/test/CodeGen/X86/abi-isel.ll +++ llvm/test/CodeGen/X86/abi-isel.ll @@ -35,8 +35,8 @@ define void @foo00() nounwind { ; LINUX-64-STATIC-LABEL: foo00: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl src(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, dst(%rip) +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: foo00: @@ -53,9 +53,9 @@ ; ; LINUX-64-PIC-LABEL: foo00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax), %eax -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -86,25 +86,25 @@ ; ; DARWIN-64-STATIC-LABEL: foo00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax), %eax -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -118,8 +118,8 @@ define void @fxo00() nounwind { ; LINUX-64-STATIC-LABEL: fxo00: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl xsrc(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, xdst(%rip) +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: fxo00: @@ -136,9 +136,9 @@ ; ; LINUX-64-PIC-LABEL: fxo00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax), %eax -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -169,25 +169,25 @@ ; ; DARWIN-64-STATIC-LABEL: fxo00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: fxo00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: fxo00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax), %eax -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -201,7 +201,7 @@ define void @foo01() nounwind { ; LINUX-64-STATIC-LABEL: foo01: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $dst, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq $dst, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: foo01: @@ -216,8 +216,8 @@ ; ; LINUX-64-PIC-LABEL: foo01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -245,22 +245,22 @@ ; ; DARWIN-64-STATIC-LABEL: foo01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -272,7 +272,7 @@ define void @fxo01() nounwind { ; LINUX-64-STATIC-LABEL: fxo01: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $xdst, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq $xdst, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: fxo01: @@ -287,8 +287,8 @@ ; ; LINUX-64-PIC-LABEL: fxo01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -316,22 +316,22 @@ ; ; DARWIN-64-STATIC-LABEL: fxo01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: fxo01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: fxo01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -343,8 +343,8 @@ define void @foo02() nounwind { ; LINUX-64-STATIC-LABEL: foo02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl src(%rip), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -364,9 +364,9 @@ ; ; LINUX-64-PIC-LABEL: foo02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx) ; LINUX-64-PIC-NEXT: retq @@ -401,27 +401,27 @@ ; ; DARWIN-64-STATIC-LABEL: foo02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -436,8 +436,8 @@ define void @fxo02() nounwind { ; LINUX-64-STATIC-LABEL: fxo02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl xsrc(%rip), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -457,9 +457,9 @@ ; ; LINUX-64-PIC-LABEL: fxo02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx) ; LINUX-64-PIC-NEXT: retq @@ -494,27 +494,27 @@ ; ; DARWIN-64-STATIC-LABEL: fxo02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: fxo02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: fxo02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -529,8 +529,8 @@ define void @foo03() nounwind { ; LINUX-64-STATIC-LABEL: foo03: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl dsrc(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, ddst(%rip) +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: foo03: @@ -547,9 +547,9 @@ ; ; LINUX-64-PIC-LABEL: foo03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax), %eax -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -576,20 +576,20 @@ ; ; DARWIN-64-STATIC-LABEL: foo03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _dsrc(%rip), %eax -; DARWIN-64-STATIC-NEXT: movl %eax, _ddst(%rip) +; DARWIN-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movl %eax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _dsrc(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ddst(%rip) +; DARWIN-64-DYNAMIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movl %eax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _dsrc(%rip), %eax -; DARWIN-64-PIC-NEXT: movl %eax, _ddst(%rip) +; DARWIN-64-PIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movl %eax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -601,7 +601,7 @@ define void @foo04() nounwind { ; LINUX-64-STATIC-LABEL: foo04: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $ddst, dptr(%rip) +; LINUX-64-STATIC-NEXT: movq $ddst, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: foo04: @@ -616,8 +616,8 @@ ; ; LINUX-64-PIC-LABEL: foo04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -642,20 +642,20 @@ ; ; DARWIN-64-STATIC-LABEL: foo04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -666,8 +666,8 @@ define void @foo05() nounwind { ; LINUX-64-STATIC-LABEL: foo05: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl dsrc(%rip), %eax -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -687,9 +687,9 @@ ; ; LINUX-64-PIC-LABEL: foo05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax), %eax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx) ; LINUX-64-PIC-NEXT: retq @@ -720,22 +720,22 @@ ; ; DARWIN-64-STATIC-LABEL: foo05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _dsrc(%rip), %eax -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _dsrc(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _dsrc(%rip), %eax -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -749,8 +749,8 @@ define void @foo06() nounwind { ; LINUX-64-STATIC-LABEL: foo06: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl lsrc(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, ldst(%rip) +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: foo06: @@ -767,8 +767,8 @@ ; ; LINUX-64-PIC-LABEL: foo06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movl lsrc(%rip), %eax -; LINUX-64-PIC-NEXT: movl %eax, ldst(%rip) +; LINUX-64-PIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-PIC-NEXT: movl %eax, {{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: foo06: @@ -794,20 +794,20 @@ ; ; DARWIN-64-STATIC-LABEL: foo06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _lsrc(%rip), %eax -; DARWIN-64-STATIC-NEXT: movl %eax, _ldst(%rip) +; DARWIN-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movl %eax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _lsrc(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ldst(%rip) +; DARWIN-64-DYNAMIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movl %eax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _lsrc(%rip), %eax -; DARWIN-64-PIC-NEXT: movl %eax, _ldst(%rip) +; DARWIN-64-PIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movl %eax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -819,7 +819,7 @@ define void @foo07() nounwind { ; LINUX-64-STATIC-LABEL: foo07: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $ldst, lptr(%rip) +; LINUX-64-STATIC-NEXT: movq $ldst, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: foo07: @@ -834,8 +834,8 @@ ; ; LINUX-64-PIC-LABEL: foo07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax -; LINUX-64-PIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: foo07: @@ -859,20 +859,20 @@ ; ; DARWIN-64-STATIC-LABEL: foo07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -883,8 +883,8 @@ define void @foo08() nounwind { ; LINUX-64-STATIC-LABEL: foo08: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl lsrc(%rip), %eax -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -904,8 +904,8 @@ ; ; LINUX-64-PIC-LABEL: foo08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movl lsrc(%rip), %eax -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-PIC-NEXT: movl {{.*}}(%rip), %eax +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -935,22 +935,22 @@ ; ; DARWIN-64-STATIC-LABEL: foo08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _lsrc(%rip), %eax -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: foo08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _lsrc(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: foo08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _lsrc(%rip), %eax -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movl {{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -964,8 +964,8 @@ define void @qux00() nounwind { ; LINUX-64-STATIC-LABEL: qux00: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl src+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, dst+64(%rip) +; LINUX-64-STATIC-NEXT: movl src+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, dst+{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qux00: @@ -982,9 +982,9 @@ ; ; LINUX-64-PIC-LABEL: qux00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax), %eax -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -1015,25 +1015,25 @@ ; ; DARWIN-64-STATIC-LABEL: qux00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1046,8 +1046,8 @@ define void @qxx00() nounwind { ; LINUX-64-STATIC-LABEL: qxx00: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl xsrc+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, xdst+64(%rip) +; LINUX-64-STATIC-NEXT: movl xsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, xdst+{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qxx00: @@ -1064,9 +1064,9 @@ ; ; LINUX-64-PIC-LABEL: qxx00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax), %eax -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -1097,25 +1097,25 @@ ; ; DARWIN-64-STATIC-LABEL: qxx00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qxx00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qxx00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1128,7 +1128,7 @@ define void @qux01() nounwind { ; LINUX-64-STATIC-LABEL: qux01: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $dst+64, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq $dst+64, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qux01: @@ -1143,9 +1143,9 @@ ; ; LINUX-64-PIC-LABEL: qux01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -1175,25 +1175,25 @@ ; ; DARWIN-64-STATIC-LABEL: qux01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1205,7 +1205,7 @@ define void @qxx01() nounwind { ; LINUX-64-STATIC-LABEL: qxx01: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $xdst+64, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq $xdst+64, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qxx01: @@ -1220,9 +1220,9 @@ ; ; LINUX-64-PIC-LABEL: qxx01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -1252,25 +1252,25 @@ ; ; DARWIN-64-STATIC-LABEL: qxx01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qxx01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qxx01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1282,8 +1282,8 @@ define void @qux02() nounwind { ; LINUX-64-STATIC-LABEL: qux02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl src+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl src+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -1303,9 +1303,9 @@ ; ; LINUX-64-PIC-LABEL: qux02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-PIC-NEXT: retq @@ -1340,27 +1340,27 @@ ; ; DARWIN-64-STATIC-LABEL: qux02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1376,8 +1376,8 @@ define void @qxx02() nounwind { ; LINUX-64-STATIC-LABEL: qxx02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl xsrc+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl xsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -1397,9 +1397,9 @@ ; ; LINUX-64-PIC-LABEL: qxx02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-PIC-NEXT: retq @@ -1434,27 +1434,27 @@ ; ; DARWIN-64-STATIC-LABEL: qxx02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qxx02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qxx02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1470,8 +1470,8 @@ define void @qux03() nounwind { ; LINUX-64-STATIC-LABEL: qux03: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl dsrc+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, ddst+64(%rip) +; LINUX-64-STATIC-NEXT: movl dsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, ddst+{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qux03: @@ -1488,9 +1488,9 @@ ; ; LINUX-64-PIC-LABEL: qux03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax), %eax -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -1517,20 +1517,20 @@ ; ; DARWIN-64-STATIC-LABEL: qux03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _dsrc+64(%rip), %eax -; DARWIN-64-STATIC-NEXT: movl %eax, _ddst+64(%rip) +; DARWIN-64-STATIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movl %eax, _ddst+{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+64(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ddst+64(%rip) +; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ddst+{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _dsrc+64(%rip), %eax -; DARWIN-64-PIC-NEXT: movl %eax, _ddst+64(%rip) +; DARWIN-64-PIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movl %eax, _ddst+{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -1542,7 +1542,7 @@ define void @qux04() nounwind { ; LINUX-64-STATIC-LABEL: qux04: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $ddst+64, dptr(%rip) +; LINUX-64-STATIC-NEXT: movq $ddst+64, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qux04: @@ -1557,9 +1557,9 @@ ; ; LINUX-64-PIC-LABEL: qux04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -1584,20 +1584,20 @@ ; ; DARWIN-64-STATIC-LABEL: qux04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst+64(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-STATIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+64(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst+64(%rip), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-PIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -1608,8 +1608,8 @@ define void @qux05() nounwind { ; LINUX-64-STATIC-LABEL: qux05: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl dsrc+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl dsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -1629,9 +1629,9 @@ ; ; LINUX-64-PIC-LABEL: qux05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax), %eax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-PIC-NEXT: retq @@ -1662,22 +1662,22 @@ ; ; DARWIN-64-STATIC-LABEL: qux05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _dsrc+64(%rip), %eax -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+64(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _dsrc+64(%rip), %eax -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1692,8 +1692,8 @@ define void @qux06() nounwind { ; LINUX-64-STATIC-LABEL: qux06: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl lsrc+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, ldst+64(%rip) +; LINUX-64-STATIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, ldst+{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qux06: @@ -1710,8 +1710,8 @@ ; ; LINUX-64-PIC-LABEL: qux06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movl lsrc+64(%rip), %eax -; LINUX-64-PIC-NEXT: movl %eax, ldst+64(%rip) +; LINUX-64-PIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-PIC-NEXT: movl %eax, ldst+{{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: qux06: @@ -1737,20 +1737,20 @@ ; ; DARWIN-64-STATIC-LABEL: qux06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _lsrc+64(%rip), %eax -; DARWIN-64-STATIC-NEXT: movl %eax, _ldst+64(%rip) +; DARWIN-64-STATIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movl %eax, _ldst+{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+64(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ldst+64(%rip) +; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ldst+{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _lsrc+64(%rip), %eax -; DARWIN-64-PIC-NEXT: movl %eax, _ldst+64(%rip) +; DARWIN-64-PIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movl %eax, _ldst+{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -1762,7 +1762,7 @@ define void @qux07() nounwind { ; LINUX-64-STATIC-LABEL: qux07: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $ldst+64, lptr(%rip) +; LINUX-64-STATIC-NEXT: movq $ldst+64, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: qux07: @@ -1777,8 +1777,8 @@ ; ; LINUX-64-PIC-LABEL: qux07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst+64(%rip), %rax -; LINUX-64-PIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-PIC-NEXT: leaq ldst+{{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: qux07: @@ -1802,20 +1802,20 @@ ; ; DARWIN-64-STATIC-LABEL: qux07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst+64(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-STATIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+64(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst+64(%rip), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-PIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -1826,8 +1826,8 @@ define void @qux08() nounwind { ; LINUX-64-STATIC-LABEL: qux08: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl lsrc+64(%rip), %eax -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -1847,8 +1847,8 @@ ; ; LINUX-64-PIC-LABEL: qux08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movl lsrc+64(%rip), %eax -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-PIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -1878,22 +1878,22 @@ ; ; DARWIN-64-STATIC-LABEL: qux08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _lsrc+64(%rip), %eax -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: qux08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+64(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: qux08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _lsrc+64(%rip), %eax -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -1928,9 +1928,9 @@ ; ; LINUX-64-PIC-LABEL: ind00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -1964,25 +1964,25 @@ ; ; DARWIN-64-STATIC-LABEL: ind00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2017,9 +2017,9 @@ ; ; LINUX-64-PIC-LABEL: ixd00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -2053,25 +2053,25 @@ ; ; DARWIN-64-STATIC-LABEL: ixd00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ixd00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ixd00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2087,7 +2087,7 @@ ; LINUX-64-STATIC-LABEL: ind01: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq dst(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: ind01: @@ -2107,8 +2107,8 @@ ; LINUX-64-PIC-LABEL: ind01: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: shlq $2, %rdi -; LINUX-64-PIC-NEXT: addq dst@GOTPCREL(%rip), %rdi -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq dst@{{.*}}(%rip), %rdi +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq %rdi, (%rax) ; LINUX-64-PIC-NEXT: retq ; @@ -2143,24 +2143,24 @@ ; DARWIN-64-STATIC-LABEL: ind01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: shlq $2, %rdi -; DARWIN-64-STATIC-NEXT: addq _dst@GOTPCREL(%rip), %rdi -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: addq _dst@{{.*}}(%rip), %rdi +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movq %rdi, (%rax) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: shlq $2, %rdi -; DARWIN-64-DYNAMIC-NEXT: addq _dst@GOTPCREL(%rip), %rdi -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: addq _dst@{{.*}}(%rip), %rdi +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movq %rdi, (%rax) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind01: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: shlq $2, %rdi -; DARWIN-64-PIC-NEXT: addq _dst@GOTPCREL(%rip), %rdi -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: addq _dst@{{.*}}(%rip), %rdi +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movq %rdi, (%rax) ; DARWIN-64-PIC-NEXT: retq @@ -2174,7 +2174,7 @@ ; LINUX-64-STATIC-LABEL: ixd01: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq xdst(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: ixd01: @@ -2194,8 +2194,8 @@ ; LINUX-64-PIC-LABEL: ixd01: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: shlq $2, %rdi -; LINUX-64-PIC-NEXT: addq xdst@GOTPCREL(%rip), %rdi -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq xdst@{{.*}}(%rip), %rdi +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq %rdi, (%rax) ; LINUX-64-PIC-NEXT: retq ; @@ -2230,24 +2230,24 @@ ; DARWIN-64-STATIC-LABEL: ixd01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: shlq $2, %rdi -; DARWIN-64-STATIC-NEXT: addq _xdst@GOTPCREL(%rip), %rdi -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: addq _xdst@{{.*}}(%rip), %rdi +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movq %rdi, (%rax) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ixd01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: shlq $2, %rdi -; DARWIN-64-DYNAMIC-NEXT: addq _xdst@GOTPCREL(%rip), %rdi -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: addq _xdst@{{.*}}(%rip), %rdi +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movq %rdi, (%rax) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ixd01: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: shlq $2, %rdi -; DARWIN-64-PIC-NEXT: addq _xdst@GOTPCREL(%rip), %rdi -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: addq _xdst@{{.*}}(%rip), %rdi +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movq %rdi, (%rax) ; DARWIN-64-PIC-NEXT: retq @@ -2261,7 +2261,7 @@ ; LINUX-64-STATIC-LABEL: ind02: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl src(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -2283,9 +2283,9 @@ ; ; LINUX-64-PIC-LABEL: ind02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -2323,27 +2323,27 @@ ; ; DARWIN-64-STATIC-LABEL: ind02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2361,7 +2361,7 @@ ; LINUX-64-STATIC-LABEL: ixd02: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl xsrc(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -2383,9 +2383,9 @@ ; ; LINUX-64-PIC-LABEL: ixd02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -2423,27 +2423,27 @@ ; ; DARWIN-64-STATIC-LABEL: ixd02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ixd02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ixd02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2480,9 +2480,9 @@ ; ; LINUX-64-PIC-LABEL: ind03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -2512,25 +2512,25 @@ ; ; DARWIN-64-STATIC-LABEL: ind03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2546,7 +2546,7 @@ ; LINUX-64-STATIC-LABEL: ind04: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq ddst(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, dptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: ind04: @@ -2566,8 +2566,8 @@ ; LINUX-64-PIC-LABEL: ind04: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: shlq $2, %rdi -; LINUX-64-PIC-NEXT: addq ddst@GOTPCREL(%rip), %rdi -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq ddst@{{.*}}(%rip), %rdi +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq %rdi, (%rax) ; LINUX-64-PIC-NEXT: retq ; @@ -2597,23 +2597,23 @@ ; ; DARWIN-64-STATIC-LABEL: ind04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq (%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq (%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq (%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -2626,7 +2626,7 @@ ; LINUX-64-STATIC-LABEL: ind05: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl dsrc(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -2648,9 +2648,9 @@ ; ; LINUX-64-PIC-LABEL: ind05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -2684,25 +2684,25 @@ ; ; DARWIN-64-STATIC-LABEL: ind05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2738,9 +2738,9 @@ ; ; LINUX-64-PIC-LABEL: ind06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rcx +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -2770,25 +2770,25 @@ ; ; DARWIN-64-STATIC-LABEL: ind06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2804,7 +2804,7 @@ ; LINUX-64-STATIC-LABEL: ind07: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq ldst(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: ind07: @@ -2823,9 +2823,9 @@ ; ; LINUX-64-PIC-LABEL: ind07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq (%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: ind07: @@ -2854,23 +2854,23 @@ ; ; DARWIN-64-STATIC-LABEL: ind07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq (%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq (%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq (%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -2883,7 +2883,7 @@ ; LINUX-64-STATIC-LABEL: ind08: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl lsrc(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -2905,9 +2905,9 @@ ; ; LINUX-64-PIC-LABEL: ind08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -2940,25 +2940,25 @@ ; ; DARWIN-64-STATIC-LABEL: ind08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ind08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ind08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl (%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, (%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -2994,9 +2994,9 @@ ; ; LINUX-64-PIC-LABEL: off00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -3030,25 +3030,25 @@ ; ; DARWIN-64-STATIC-LABEL: off00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -3084,9 +3084,9 @@ ; ; LINUX-64-PIC-LABEL: oxf00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -3120,25 +3120,25 @@ ; ; DARWIN-64-STATIC-LABEL: oxf00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: oxf00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: oxf00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -3155,7 +3155,7 @@ ; LINUX-64-STATIC-LABEL: off01: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq dst+64(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: off01: @@ -3174,9 +3174,9 @@ ; ; LINUX-64-PIC-LABEL: off01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -3210,25 +3210,25 @@ ; ; DARWIN-64-STATIC-LABEL: off01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -3243,7 +3243,7 @@ ; LINUX-64-STATIC-LABEL: oxf01: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq xdst+64(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: oxf01: @@ -3262,9 +3262,9 @@ ; ; LINUX-64-PIC-LABEL: oxf01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -3298,25 +3298,25 @@ ; ; DARWIN-64-STATIC-LABEL: oxf01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: oxf01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: oxf01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -3331,7 +3331,7 @@ ; LINUX-64-STATIC-LABEL: off02: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl src+64(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -3353,9 +3353,9 @@ ; ; LINUX-64-PIC-LABEL: off02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -3393,27 +3393,27 @@ ; ; DARWIN-64-STATIC-LABEL: off02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -3432,7 +3432,7 @@ ; LINUX-64-STATIC-LABEL: oxf02: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl xsrc+64(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -3454,9 +3454,9 @@ ; ; LINUX-64-PIC-LABEL: oxf02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -3494,27 +3494,27 @@ ; ; DARWIN-64-STATIC-LABEL: oxf02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: oxf02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: oxf02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -3552,9 +3552,9 @@ ; ; LINUX-64-PIC-LABEL: off03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -3584,25 +3584,25 @@ ; ; DARWIN-64-STATIC-LABEL: off03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -3619,7 +3619,7 @@ ; LINUX-64-STATIC-LABEL: off04: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq ddst+64(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, dptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: off04: @@ -3638,9 +3638,9 @@ ; ; LINUX-64-PIC-LABEL: off04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -3670,23 +3670,23 @@ ; ; DARWIN-64-STATIC-LABEL: off04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -3700,7 +3700,7 @@ ; LINUX-64-STATIC-LABEL: off05: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl dsrc+64(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -3722,9 +3722,9 @@ ; ; LINUX-64-PIC-LABEL: off05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -3758,25 +3758,25 @@ ; ; DARWIN-64-STATIC-LABEL: off05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -3813,9 +3813,9 @@ ; ; LINUX-64-PIC-LABEL: off06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rcx +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -3845,25 +3845,25 @@ ; ; DARWIN-64-STATIC-LABEL: off06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -3880,7 +3880,7 @@ ; LINUX-64-STATIC-LABEL: off07: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq ldst+64(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: off07: @@ -3899,9 +3899,9 @@ ; ; LINUX-64-PIC-LABEL: off07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: off07: @@ -3930,23 +3930,23 @@ ; ; DARWIN-64-STATIC-LABEL: off07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -3960,7 +3960,7 @@ ; LINUX-64-STATIC-LABEL: off08: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl lsrc+64(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -3982,9 +3982,9 @@ ; ; LINUX-64-PIC-LABEL: off08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -4017,25 +4017,25 @@ ; ; DARWIN-64-STATIC-LABEL: off08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: off08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: off08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 64(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 64(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -4052,8 +4052,8 @@ define void @moo00(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo00: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl src+262144(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, dst+262144(%rip) +; LINUX-64-STATIC-NEXT: movl src+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, dst+{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: moo00: @@ -4070,9 +4070,9 @@ ; ; LINUX-64-PIC-LABEL: moo00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax), %eax -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -4103,25 +4103,25 @@ ; ; DARWIN-64-STATIC-LABEL: moo00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax), %eax -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -4134,7 +4134,7 @@ define void @moo01(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo01: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $dst+262144, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq $dst+262144, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: moo01: @@ -4150,8 +4150,8 @@ ; LINUX-64-PIC-LABEL: moo01: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq dst@GOTPCREL(%rip), %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: addq dst@{{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -4182,24 +4182,24 @@ ; DARWIN-64-STATIC-LABEL: moo01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-STATIC-NEXT: addq _dst@GOTPCREL(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: addq _dst@{{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-DYNAMIC-NEXT: addq _dst@GOTPCREL(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: addq _dst@{{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo01: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-PIC-NEXT: addq _dst@GOTPCREL(%rip), %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: addq _dst@{{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -4211,8 +4211,8 @@ define void @moo02(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl src+262144(%rip), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl src+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -4232,9 +4232,9 @@ ; ; LINUX-64-PIC-LABEL: moo02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-PIC-NEXT: retq @@ -4269,27 +4269,27 @@ ; ; DARWIN-64-STATIC-LABEL: moo02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -4305,8 +4305,8 @@ define void @moo03(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo03: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl dsrc+262144(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, ddst+262144(%rip) +; LINUX-64-STATIC-NEXT: movl dsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, ddst+{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: moo03: @@ -4323,9 +4323,9 @@ ; ; LINUX-64-PIC-LABEL: moo03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax), %eax -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -4352,20 +4352,20 @@ ; ; DARWIN-64-STATIC-LABEL: moo03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _dsrc+262144(%rip), %eax -; DARWIN-64-STATIC-NEXT: movl %eax, _ddst+262144(%rip) +; DARWIN-64-STATIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movl %eax, _ddst+{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+262144(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ddst+262144(%rip) +; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ddst+{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _dsrc+262144(%rip), %eax -; DARWIN-64-PIC-NEXT: movl %eax, _ddst+262144(%rip) +; DARWIN-64-PIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movl %eax, _ddst+{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -4377,7 +4377,7 @@ define void @moo04(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo04: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $ddst+262144, dptr(%rip) +; LINUX-64-STATIC-NEXT: movq $ddst+262144, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: moo04: @@ -4393,8 +4393,8 @@ ; LINUX-64-PIC-LABEL: moo04: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq ddst@GOTPCREL(%rip), %rax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: addq ddst@{{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -4419,20 +4419,20 @@ ; ; DARWIN-64-STATIC-LABEL: moo04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst+262144(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-STATIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+262144(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst+262144(%rip), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-PIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -4443,8 +4443,8 @@ define void @moo05(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo05: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl dsrc+262144(%rip), %eax -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl dsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -4464,9 +4464,9 @@ ; ; LINUX-64-PIC-LABEL: moo05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax), %eax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-PIC-NEXT: retq @@ -4497,22 +4497,22 @@ ; ; DARWIN-64-STATIC-LABEL: moo05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _dsrc+262144(%rip), %eax -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+262144(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _dsrc+262144(%rip), %eax -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movl _dsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -4527,8 +4527,8 @@ define void @moo06(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo06: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl lsrc+262144(%rip), %eax -; LINUX-64-STATIC-NEXT: movl %eax, ldst+262144(%rip) +; LINUX-64-STATIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movl %eax, ldst+{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: moo06: @@ -4545,8 +4545,8 @@ ; ; LINUX-64-PIC-LABEL: moo06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movl lsrc+262144(%rip), %eax -; LINUX-64-PIC-NEXT: movl %eax, ldst+262144(%rip) +; LINUX-64-PIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-PIC-NEXT: movl %eax, ldst+{{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: moo06: @@ -4572,20 +4572,20 @@ ; ; DARWIN-64-STATIC-LABEL: moo06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _lsrc+262144(%rip), %eax -; DARWIN-64-STATIC-NEXT: movl %eax, _ldst+262144(%rip) +; DARWIN-64-STATIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movl %eax, _ldst+{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+262144(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ldst+262144(%rip) +; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movl %eax, _ldst+{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _lsrc+262144(%rip), %eax -; DARWIN-64-PIC-NEXT: movl %eax, _ldst+262144(%rip) +; DARWIN-64-PIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movl %eax, _ldst+{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -4597,7 +4597,7 @@ define void @moo07(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo07: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq $ldst+262144, lptr(%rip) +; LINUX-64-STATIC-NEXT: movq $ldst+262144, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: moo07: @@ -4612,8 +4612,8 @@ ; ; LINUX-64-PIC-LABEL: moo07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst+262144(%rip), %rax -; LINUX-64-PIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-PIC-NEXT: leaq ldst+{{.*}}(%rip), %rax +; LINUX-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: moo07: @@ -4637,20 +4637,20 @@ ; ; DARWIN-64-STATIC-LABEL: moo07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst+262144(%rip), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-STATIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+262144(%rip), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst+262144(%rip), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-PIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -4661,8 +4661,8 @@ define void @moo08(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: moo08: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movl lsrc+262144(%rip), %eax -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-STATIC-NEXT: retq ; @@ -4682,8 +4682,8 @@ ; ; LINUX-64-PIC-LABEL: moo08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movl lsrc+262144(%rip), %eax -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-PIC-NEXT: movl lsrc+{{.*}}(%rip), %eax +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -4713,22 +4713,22 @@ ; ; DARWIN-64-STATIC-LABEL: moo08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movl _lsrc+262144(%rip), %eax -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: moo08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+262144(%rip), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: moo08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movl _lsrc+262144(%rip), %eax -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movl _lsrc+{{.*}}(%rip), %eax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -4763,9 +4763,9 @@ ; ; LINUX-64-PIC-LABEL: big00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -4799,25 +4799,25 @@ ; ; DARWIN-64-STATIC-LABEL: big00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -4834,7 +4834,7 @@ ; LINUX-64-STATIC-LABEL: big01: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq dst+262144(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, ptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: big01: @@ -4853,9 +4853,9 @@ ; ; LINUX-64-PIC-LABEL: big01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -4889,25 +4889,25 @@ ; ; DARWIN-64-STATIC-LABEL: big01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq %rax, (%rcx) ; DARWIN-64-PIC-NEXT: retq @@ -4922,7 +4922,7 @@ ; LINUX-64-STATIC-LABEL: big02: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl src+262144(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -4944,9 +4944,9 @@ ; ; LINUX-64-PIC-LABEL: big02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -4984,27 +4984,27 @@ ; ; DARWIN-64-STATIC-LABEL: big02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movq (%rcx), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -5042,9 +5042,9 @@ ; ; LINUX-64-PIC-LABEL: big03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -5074,25 +5074,25 @@ ; ; DARWIN-64-STATIC-LABEL: big03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rcx +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -5109,7 +5109,7 @@ ; LINUX-64-STATIC-LABEL: big04: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq ddst+262144(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, dptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: big04: @@ -5128,9 +5128,9 @@ ; ; LINUX-64-PIC-LABEL: big04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq %rax, (%rcx) ; LINUX-64-PIC-NEXT: retq ; @@ -5160,23 +5160,23 @@ ; ; DARWIN-64-STATIC-LABEL: big04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _dptr(%rip) +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -5190,7 +5190,7 @@ ; LINUX-64-STATIC-LABEL: big05: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl dsrc+262144(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -5212,9 +5212,9 @@ ; ; LINUX-64-PIC-LABEL: big05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rcx +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movq (%rcx), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq @@ -5248,25 +5248,25 @@ ; ; DARWIN-64-STATIC-LABEL: big05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -5303,9 +5303,9 @@ ; ; LINUX-64-PIC-LABEL: big06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rcx +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -5335,25 +5335,25 @@ ; ; DARWIN-64-STATIC-LABEL: big06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rcx +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -5370,7 +5370,7 @@ ; LINUX-64-STATIC-LABEL: big07: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: leaq ldst+262144(,%rdi,4), %rax -; LINUX-64-STATIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: big07: @@ -5389,9 +5389,9 @@ ; ; LINUX-64-PIC-LABEL: big07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; LINUX-64-PIC-NEXT: movq %rax, lptr(%rip) +; LINUX-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: big07: @@ -5420,23 +5420,23 @@ ; ; DARWIN-64-STATIC-LABEL: big07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-STATIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-STATIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-DYNAMIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-DYNAMIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax -; DARWIN-64-PIC-NEXT: movq %rax, _lptr(%rip) +; DARWIN-64-PIC-NEXT: movq %rax, {{.*}}(%rip) ; DARWIN-64-PIC-NEXT: retq entry: @@ -5450,7 +5450,7 @@ ; LINUX-64-STATIC-LABEL: big08: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl lsrc+262144(,%rdi,4), %eax -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-STATIC-NEXT: retq ; @@ -5472,9 +5472,9 @@ ; ; LINUX-64-PIC-LABEL: big08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rcx +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; LINUX-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; LINUX-64-PIC-NEXT: retq ; @@ -5507,25 +5507,25 @@ ; ; DARWIN-64-STATIC-LABEL: big08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-STATIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: big08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-DYNAMIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: big08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movl 262144(%rax,%rdi,4), %eax -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rcx +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rcx ; DARWIN-64-PIC-NEXT: movl %eax, 262144(%rcx,%rdi,4) ; DARWIN-64-PIC-NEXT: retq @@ -5557,7 +5557,7 @@ ; ; LINUX-64-PIC-LABEL: bar00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar00: @@ -5580,17 +5580,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -5615,7 +5615,7 @@ ; ; LINUX-64-PIC-LABEL: bxr00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bxr00: @@ -5638,17 +5638,17 @@ ; ; DARWIN-64-STATIC-LABEL: bxr00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bxr00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bxr00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -5673,7 +5673,7 @@ ; ; LINUX-64-PIC-LABEL: bar01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar01: @@ -5696,17 +5696,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -5731,7 +5731,7 @@ ; ; LINUX-64-PIC-LABEL: bxr01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bxr01: @@ -5754,17 +5754,17 @@ ; ; DARWIN-64-STATIC-LABEL: bxr01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bxr01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bxr01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -5789,7 +5789,7 @@ ; ; LINUX-64-PIC-LABEL: bar02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar02: @@ -5812,17 +5812,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -5847,7 +5847,7 @@ ; ; LINUX-64-PIC-LABEL: bar03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar03: @@ -5870,17 +5870,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -5905,7 +5905,7 @@ ; ; LINUX-64-PIC-LABEL: bar04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar04: @@ -5928,17 +5928,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -5963,7 +5963,7 @@ ; ; LINUX-64-PIC-LABEL: bar05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar05: @@ -5986,17 +5986,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dptr(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6021,7 +6021,7 @@ ; ; LINUX-64-PIC-LABEL: bar06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar06: @@ -6044,17 +6044,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6079,7 +6079,7 @@ ; ; LINUX-64-PIC-LABEL: bar07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar07: @@ -6102,17 +6102,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6137,7 +6137,7 @@ ; ; LINUX-64-PIC-LABEL: bar08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lptr(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bar08: @@ -6160,17 +6160,17 @@ ; ; DARWIN-64-STATIC-LABEL: bar08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bar08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bar08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lptr(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6195,7 +6195,7 @@ ; ; LINUX-64-PIC-LABEL: har00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: har00: @@ -6218,17 +6218,17 @@ ; ; DARWIN-64-STATIC-LABEL: har00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6253,7 +6253,7 @@ ; ; LINUX-64-PIC-LABEL: hxr00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: hxr00: @@ -6276,17 +6276,17 @@ ; ; DARWIN-64-STATIC-LABEL: hxr00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: hxr00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: hxr00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6311,7 +6311,7 @@ ; ; LINUX-64-PIC-LABEL: har01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: har01: @@ -6334,17 +6334,17 @@ ; ; DARWIN-64-STATIC-LABEL: har01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6369,7 +6369,7 @@ ; ; LINUX-64-PIC-LABEL: hxr01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: hxr01: @@ -6392,17 +6392,17 @@ ; ; DARWIN-64-STATIC-LABEL: hxr01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: hxr01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: hxr01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6412,7 +6412,7 @@ define i8* @har02() nounwind { ; LINUX-64-STATIC-LABEL: har02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: har02: @@ -6427,7 +6427,7 @@ ; ; LINUX-64-PIC-LABEL: har02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -6453,19 +6453,19 @@ ; ; DARWIN-64-STATIC-LABEL: har02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movq (%rax), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movq (%rax), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movq (%rax), %rax ; DARWIN-64-PIC-NEXT: retq @@ -6493,7 +6493,7 @@ ; ; LINUX-64-PIC-LABEL: har03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: har03: @@ -6516,17 +6516,17 @@ ; ; DARWIN-64-STATIC-LABEL: har03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6551,7 +6551,7 @@ ; ; LINUX-64-PIC-LABEL: har04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: har04: @@ -6574,17 +6574,17 @@ ; ; DARWIN-64-STATIC-LABEL: har04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6594,7 +6594,7 @@ define i8* @har05() nounwind { ; LINUX-64-STATIC-LABEL: har05: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: har05: @@ -6609,7 +6609,7 @@ ; ; LINUX-64-PIC-LABEL: har05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -6633,17 +6633,17 @@ ; ; DARWIN-64-STATIC-LABEL: har05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6670,7 +6670,7 @@ ; ; LINUX-64-PIC-LABEL: har06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: har06: @@ -6693,17 +6693,17 @@ ; ; DARWIN-64-STATIC-LABEL: har06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6728,7 +6728,7 @@ ; ; LINUX-64-PIC-LABEL: har07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: har07: @@ -6751,17 +6751,17 @@ ; ; DARWIN-64-STATIC-LABEL: har07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6771,7 +6771,7 @@ define i8* @har08() nounwind { ; LINUX-64-STATIC-LABEL: har08: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: har08: @@ -6786,7 +6786,7 @@ ; ; LINUX-64-PIC-LABEL: har08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: har08: @@ -6809,17 +6809,17 @@ ; ; DARWIN-64-STATIC-LABEL: har08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: har08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: har08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -6846,7 +6846,7 @@ ; ; LINUX-64-PIC-LABEL: bat00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq ; @@ -6872,19 +6872,19 @@ ; ; DARWIN-64-STATIC-LABEL: bat00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax ; DARWIN-64-PIC-NEXT: retq @@ -6910,7 +6910,7 @@ ; ; LINUX-64-PIC-LABEL: bxt00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq ; @@ -6936,19 +6936,19 @@ ; ; DARWIN-64-STATIC-LABEL: bxt00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bxt00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bxt00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax ; DARWIN-64-PIC-NEXT: retq @@ -6974,7 +6974,7 @@ ; ; LINUX-64-PIC-LABEL: bat01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq ; @@ -7000,19 +7000,19 @@ ; ; DARWIN-64-STATIC-LABEL: bat01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax ; DARWIN-64-PIC-NEXT: retq @@ -7038,7 +7038,7 @@ ; ; LINUX-64-PIC-LABEL: bxt01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq ; @@ -7064,19 +7064,19 @@ ; ; DARWIN-64-STATIC-LABEL: bxt01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bxt01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bxt01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax ; DARWIN-64-PIC-NEXT: retq @@ -7087,7 +7087,7 @@ define i8* @bat02() nounwind { ; LINUX-64-STATIC-LABEL: bat02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: addq $64, %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -7105,7 +7105,7 @@ ; ; LINUX-64-PIC-LABEL: bat02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq @@ -7135,21 +7135,21 @@ ; ; DARWIN-64-STATIC-LABEL: bat02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movq (%rax), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movq (%rax), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movq (%rax), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax ; DARWIN-64-PIC-NEXT: retq @@ -7179,7 +7179,7 @@ ; ; LINUX-64-PIC-LABEL: bat03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq ; @@ -7203,17 +7203,17 @@ ; ; DARWIN-64-STATIC-LABEL: bat03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc+64(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _dsrc+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc+64(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc+64(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _dsrc+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7238,7 +7238,7 @@ ; ; LINUX-64-PIC-LABEL: bat04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq ; @@ -7262,17 +7262,17 @@ ; ; DARWIN-64-STATIC-LABEL: bat04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst+64(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+64(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst+64(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7282,7 +7282,7 @@ define i8* @bat05() nounwind { ; LINUX-64-STATIC-LABEL: bat05: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: addq $64, %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -7300,7 +7300,7 @@ ; ; LINUX-64-PIC-LABEL: bat05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq @@ -7328,19 +7328,19 @@ ; ; DARWIN-64-STATIC-LABEL: bat05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax ; DARWIN-64-PIC-NEXT: retq @@ -7369,7 +7369,7 @@ ; ; LINUX-64-PIC-LABEL: bat06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc+64(%rip), %rax +; LINUX-64-PIC-NEXT: leaq lsrc+{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bat06: @@ -7392,17 +7392,17 @@ ; ; DARWIN-64-STATIC-LABEL: bat06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc+64(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _lsrc+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc+64(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc+64(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _lsrc+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7427,7 +7427,7 @@ ; ; LINUX-64-PIC-LABEL: bat07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst+64(%rip), %rax +; LINUX-64-PIC-NEXT: leaq ldst+{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bat07: @@ -7450,17 +7450,17 @@ ; ; DARWIN-64-STATIC-LABEL: bat07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst+64(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+64(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst+64(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7470,7 +7470,7 @@ define i8* @bat08() nounwind { ; LINUX-64-STATIC-LABEL: bat08: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: addq $64, %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -7488,7 +7488,7 @@ ; ; LINUX-64-PIC-LABEL: bat08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: addq $64, %rax ; LINUX-64-PIC-NEXT: retq ; @@ -7515,19 +7515,19 @@ ; ; DARWIN-64-STATIC-LABEL: bat08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: addq $64, %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bat08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: addq $64, %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bat08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: addq $64, %rax ; DARWIN-64-PIC-NEXT: retq @@ -7557,7 +7557,7 @@ ; LINUX-64-PIC-LABEL: bam00: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bam00: @@ -7583,19 +7583,19 @@ ; DARWIN-64-STATIC-LABEL: bam00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-STATIC-NEXT: addq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: addq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-DYNAMIC-NEXT: addq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: addq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam00: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-PIC-NEXT: addq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: addq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7621,7 +7621,7 @@ ; LINUX-64-PIC-LABEL: bam01: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bam01: @@ -7647,19 +7647,19 @@ ; DARWIN-64-STATIC-LABEL: bam01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-STATIC-NEXT: addq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: addq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-DYNAMIC-NEXT: addq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: addq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam01: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-PIC-NEXT: addq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: addq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7685,7 +7685,7 @@ ; LINUX-64-PIC-LABEL: bxm01: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bxm01: @@ -7711,19 +7711,19 @@ ; DARWIN-64-STATIC-LABEL: bxm01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-STATIC-NEXT: addq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: addq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bxm01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-DYNAMIC-NEXT: addq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: addq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bxm01: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-PIC-NEXT: addq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: addq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7734,7 +7734,7 @@ ; LINUX-64-STATIC-LABEL: bam02: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-STATIC-NEXT: addq ptr(%rip), %rax +; LINUX-64-STATIC-NEXT: addq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: bam02: @@ -7826,7 +7826,7 @@ ; LINUX-64-PIC-LABEL: bam03: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bam03: @@ -7849,17 +7849,17 @@ ; ; DARWIN-64-STATIC-LABEL: bam03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc+262144(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _dsrc+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc+262144(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc+262144(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _dsrc+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7885,7 +7885,7 @@ ; LINUX-64-PIC-LABEL: bam04: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: addq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bam04: @@ -7908,17 +7908,17 @@ ; ; DARWIN-64-STATIC-LABEL: bam04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst+262144(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+262144(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst+262144(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _ddst+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -7929,7 +7929,7 @@ ; LINUX-64-STATIC-LABEL: bam05: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-STATIC-NEXT: addq dptr(%rip), %rax +; LINUX-64-STATIC-NEXT: addq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: bam05: @@ -7975,19 +7975,19 @@ ; DARWIN-64-STATIC-LABEL: bam05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-STATIC-NEXT: addq _dptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: addq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-DYNAMIC-NEXT: addq _dptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: addq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam05: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-PIC-NEXT: addq _dptr(%rip), %rax +; DARWIN-64-PIC-NEXT: addq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -8015,7 +8015,7 @@ ; ; LINUX-64-PIC-LABEL: bam06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc+262144(%rip), %rax +; LINUX-64-PIC-NEXT: leaq lsrc+{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bam06: @@ -8038,17 +8038,17 @@ ; ; DARWIN-64-STATIC-LABEL: bam06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc+262144(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _lsrc+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc+262144(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc+262144(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _lsrc+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -8073,7 +8073,7 @@ ; ; LINUX-64-PIC-LABEL: bam07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst+262144(%rip), %rax +; LINUX-64-PIC-NEXT: leaq ldst+{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bam07: @@ -8096,17 +8096,17 @@ ; ; DARWIN-64-STATIC-LABEL: bam07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst+262144(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+262144(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst+262144(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq _ldst+{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -8117,7 +8117,7 @@ ; LINUX-64-STATIC-LABEL: bam08: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-STATIC-NEXT: addq lptr(%rip), %rax +; LINUX-64-STATIC-NEXT: addq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: retq ; ; LINUX-32-STATIC-LABEL: bam08: @@ -8135,7 +8135,7 @@ ; LINUX-64-PIC-LABEL: bam08: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: movl $262144, %eax # imm = 0x40000 -; LINUX-64-PIC-NEXT: addq lptr(%rip), %rax +; LINUX-64-PIC-NEXT: addq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: bam08: @@ -8162,19 +8162,19 @@ ; DARWIN-64-STATIC-LABEL: bam08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-STATIC-NEXT: addq _lptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: addq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: bam08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-DYNAMIC-NEXT: addq _lptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: addq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: bam08: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: movl $262144, %eax ## imm = 0x40000 -; DARWIN-64-PIC-NEXT: addq _lptr(%rip), %rax +; DARWIN-64-PIC-NEXT: addq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -8204,7 +8204,7 @@ ; ; LINUX-64-PIC-LABEL: cat00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8233,19 +8233,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8276,7 +8276,7 @@ ; ; LINUX-64-PIC-LABEL: cxt00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8305,19 +8305,19 @@ ; ; DARWIN-64-STATIC-LABEL: cxt00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cxt00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cxt00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8348,7 +8348,7 @@ ; ; LINUX-64-PIC-LABEL: cat01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8377,19 +8377,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8420,7 +8420,7 @@ ; ; LINUX-64-PIC-LABEL: cxt01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8449,19 +8449,19 @@ ; ; DARWIN-64-STATIC-LABEL: cxt01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cxt01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cxt01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8475,7 +8475,7 @@ define i8* @cat02(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: cat02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -8495,7 +8495,7 @@ ; ; LINUX-64-PIC-LABEL: cat02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq @@ -8528,21 +8528,21 @@ ; ; DARWIN-64-STATIC-LABEL: cat02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movq (%rax), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movq (%rax), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movq (%rax), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8575,7 +8575,7 @@ ; ; LINUX-64-PIC-LABEL: cat03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8602,19 +8602,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8645,7 +8645,7 @@ ; ; LINUX-64-PIC-LABEL: cat04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8672,19 +8672,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8698,7 +8698,7 @@ define i8* @cat05(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: cat05: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -8718,7 +8718,7 @@ ; ; LINUX-64-PIC-LABEL: cat05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq @@ -8749,19 +8749,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8793,7 +8793,7 @@ ; ; LINUX-64-PIC-LABEL: cat06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8820,19 +8820,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8863,7 +8863,7 @@ ; ; LINUX-64-PIC-LABEL: cat07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8890,19 +8890,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -8916,7 +8916,7 @@ define i8* @cat08(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: cat08: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -8936,7 +8936,7 @@ ; ; LINUX-64-PIC-LABEL: cat08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -8966,19 +8966,19 @@ ; ; DARWIN-64-STATIC-LABEL: cat08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cat08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cat08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 64(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9010,7 +9010,7 @@ ; ; LINUX-64-PIC-LABEL: cam00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq src@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq src@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9039,19 +9039,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _src@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _src@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9082,7 +9082,7 @@ ; ; LINUX-64-PIC-LABEL: cxm00: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9111,19 +9111,19 @@ ; ; DARWIN-64-STATIC-LABEL: cxm00: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cxm00: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cxm00: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xsrc@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xsrc@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9154,7 +9154,7 @@ ; ; LINUX-64-PIC-LABEL: cam01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9183,19 +9183,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _dst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9226,7 +9226,7 @@ ; ; LINUX-64-PIC-LABEL: cxm01: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq xdst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq xdst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9255,19 +9255,19 @@ ; ; DARWIN-64-STATIC-LABEL: cxm01: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cxm01: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cxm01: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _xdst@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _xdst@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9281,7 +9281,7 @@ define i8* @cam02(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: cam02: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq ptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -9301,7 +9301,7 @@ ; ; LINUX-64-PIC-LABEL: cam02: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq @@ -9334,21 +9334,21 @@ ; ; DARWIN-64-STATIC-LABEL: cam02: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: movq (%rax), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam02: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: movq (%rax), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam02: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _ptr@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _ptr@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: movq (%rax), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9381,7 +9381,7 @@ ; ; LINUX-64-PIC-LABEL: cam03: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dsrc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dsrc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9408,19 +9408,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam03: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam03: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam03: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9451,7 +9451,7 @@ ; ; LINUX-64-PIC-LABEL: cam04: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq ddst@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq ddst@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9478,19 +9478,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam04: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam04: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam04: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ddst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9504,7 +9504,7 @@ define i8* @cam05(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: cam05: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq dptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -9524,7 +9524,7 @@ ; ; LINUX-64-PIC-LABEL: cam05: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq dptr@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq dptr@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: movq (%rax), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq @@ -9555,19 +9555,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam05: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam05: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam05: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _dptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9599,7 +9599,7 @@ ; ; LINUX-64-PIC-LABEL: cam06: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq lsrc(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9626,19 +9626,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam06: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam06: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam06: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lsrc(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9669,7 +9669,7 @@ ; ; LINUX-64-PIC-LABEL: cam07: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq ldst(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9696,19 +9696,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam07: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam07: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam07: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _ldst(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -9722,7 +9722,7 @@ define i8* @cam08(i64 %i) nounwind { ; LINUX-64-STATIC-LABEL: cam08: ; LINUX-64-STATIC: # %bb.0: # %entry -; LINUX-64-STATIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -9742,7 +9742,7 @@ ; ; LINUX-64-PIC-LABEL: cam08: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq lptr(%rip), %rax +; LINUX-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; LINUX-64-PIC-NEXT: retq ; @@ -9772,19 +9772,19 @@ ; ; DARWIN-64-STATIC-LABEL: cam08: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: cam08: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: cam08: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _lptr(%rip), %rax +; DARWIN-64-PIC-NEXT: movq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: leaq 262144(%rax,%rdi,4), %rax ; DARWIN-64-PIC-NEXT: retq @@ -10102,7 +10102,7 @@ ; ; LINUX-64-PIC-LABEL: address: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq callee@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq callee@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: address: @@ -10125,17 +10125,17 @@ ; ; DARWIN-64-STATIC-LABEL: address: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: movq _callee@GOTPCREL(%rip), %rax +; DARWIN-64-STATIC-NEXT: movq _callee@{{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: address: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: movq _callee@GOTPCREL(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: movq _callee@{{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: address: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: movq _callee@GOTPCREL(%rip), %rax +; DARWIN-64-PIC-NEXT: movq _callee@{{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -10162,7 +10162,7 @@ ; ; LINUX-64-PIC-LABEL: laddress: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: movq lcallee@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq lcallee@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: laddress: @@ -10185,17 +10185,17 @@ ; ; DARWIN-64-STATIC-LABEL: laddress: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _lcallee(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: laddress: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _lcallee(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: laddress: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _lcallee(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -10220,7 +10220,7 @@ ; ; LINUX-64-PIC-LABEL: daddress: ; LINUX-64-PIC: # %bb.0: # %entry -; LINUX-64-PIC-NEXT: leaq dcallee(%rip), %rax +; LINUX-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: retq ; ; DARWIN-32-STATIC-LABEL: daddress: @@ -10243,17 +10243,17 @@ ; ; DARWIN-64-STATIC-LABEL: daddress: ; DARWIN-64-STATIC: ## %bb.0: ## %entry -; DARWIN-64-STATIC-NEXT: leaq _dcallee(%rip), %rax +; DARWIN-64-STATIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: daddress: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry -; DARWIN-64-DYNAMIC-NEXT: leaq _dcallee(%rip), %rax +; DARWIN-64-DYNAMIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: daddress: ; DARWIN-64-PIC: ## %bb.0: ## %entry -; DARWIN-64-PIC-NEXT: leaq _dcallee(%rip), %rax +; DARWIN-64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; DARWIN-64-PIC-NEXT: retq entry: @@ -10753,8 +10753,8 @@ ; LINUX-64-STATIC-LABEL: icaller: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: pushq %rax -; LINUX-64-STATIC-NEXT: callq *ifunc(%rip) -; LINUX-64-STATIC-NEXT: callq *ifunc(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: popq %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -10777,7 +10777,7 @@ ; LINUX-64-PIC-LABEL: icaller: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: pushq %rbx -; LINUX-64-PIC-NEXT: movq ifunc@GOTPCREL(%rip), %rbx +; LINUX-64-PIC-NEXT: movq ifunc@{{.*}}(%rip), %rbx ; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: popq %rbx @@ -10819,7 +10819,7 @@ ; DARWIN-64-STATIC-LABEL: icaller: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: pushq %rbx -; DARWIN-64-STATIC-NEXT: movq _ifunc@GOTPCREL(%rip), %rbx +; DARWIN-64-STATIC-NEXT: movq _ifunc@{{.*}}(%rip), %rbx ; DARWIN-64-STATIC-NEXT: callq *(%rbx) ; DARWIN-64-STATIC-NEXT: callq *(%rbx) ; DARWIN-64-STATIC-NEXT: popq %rbx @@ -10828,7 +10828,7 @@ ; DARWIN-64-DYNAMIC-LABEL: icaller: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: pushq %rbx -; DARWIN-64-DYNAMIC-NEXT: movq _ifunc@GOTPCREL(%rip), %rbx +; DARWIN-64-DYNAMIC-NEXT: movq _ifunc@{{.*}}(%rip), %rbx ; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) ; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) ; DARWIN-64-DYNAMIC-NEXT: popq %rbx @@ -10837,7 +10837,7 @@ ; DARWIN-64-PIC-LABEL: icaller: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: pushq %rbx -; DARWIN-64-PIC-NEXT: movq _ifunc@GOTPCREL(%rip), %rbx +; DARWIN-64-PIC-NEXT: movq _ifunc@{{.*}}(%rip), %rbx ; DARWIN-64-PIC-NEXT: callq *(%rbx) ; DARWIN-64-PIC-NEXT: callq *(%rbx) ; DARWIN-64-PIC-NEXT: popq %rbx @@ -10855,8 +10855,8 @@ ; LINUX-64-STATIC-LABEL: dicaller: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: pushq %rax -; LINUX-64-STATIC-NEXT: callq *difunc(%rip) -; LINUX-64-STATIC-NEXT: callq *difunc(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: popq %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -10879,7 +10879,7 @@ ; LINUX-64-PIC-LABEL: dicaller: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: pushq %rbx -; LINUX-64-PIC-NEXT: movq difunc@GOTPCREL(%rip), %rbx +; LINUX-64-PIC-NEXT: movq difunc@{{.*}}(%rip), %rbx ; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: popq %rbx @@ -10917,24 +10917,24 @@ ; DARWIN-64-STATIC-LABEL: dicaller: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: pushq %rax -; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip) -; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *{{.*}}(%rip) +; DARWIN-64-STATIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: popq %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: dicaller: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: pushq %rax -; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip) -; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *{{.*}}(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: popq %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: dicaller: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: pushq %rax -; DARWIN-64-PIC-NEXT: callq *_difunc(%rip) -; DARWIN-64-PIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-PIC-NEXT: callq *{{.*}}(%rip) +; DARWIN-64-PIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: popq %rax ; DARWIN-64-PIC-NEXT: retq @@ -10950,8 +10950,8 @@ ; LINUX-64-STATIC-LABEL: licaller: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: pushq %rax -; LINUX-64-STATIC-NEXT: callq *lifunc(%rip) -; LINUX-64-STATIC-NEXT: callq *lifunc(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: popq %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -10974,8 +10974,8 @@ ; LINUX-64-PIC-LABEL: licaller: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: pushq %rax -; LINUX-64-PIC-NEXT: callq *lifunc(%rip) -; LINUX-64-PIC-NEXT: callq *lifunc(%rip) +; LINUX-64-PIC-NEXT: callq *{{.*}}(%rip) +; LINUX-64-PIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-PIC-NEXT: popq %rax ; LINUX-64-PIC-NEXT: retq ; @@ -11011,24 +11011,24 @@ ; DARWIN-64-STATIC-LABEL: licaller: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: pushq %rax -; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip) -; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *{{.*}}(%rip) +; DARWIN-64-STATIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: popq %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: licaller: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: pushq %rax -; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip) -; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *{{.*}}(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: popq %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: licaller: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: pushq %rax -; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip) -; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-PIC-NEXT: callq *{{.*}}(%rip) +; DARWIN-64-PIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: popq %rax ; DARWIN-64-PIC-NEXT: retq @@ -11044,8 +11044,8 @@ ; LINUX-64-STATIC-LABEL: itailcaller: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: pushq %rax -; LINUX-64-STATIC-NEXT: callq *ifunc(%rip) -; LINUX-64-STATIC-NEXT: callq *ifunc(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: popq %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -11068,7 +11068,7 @@ ; LINUX-64-PIC-LABEL: itailcaller: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: pushq %rbx -; LINUX-64-PIC-NEXT: movq ifunc@GOTPCREL(%rip), %rbx +; LINUX-64-PIC-NEXT: movq ifunc@{{.*}}(%rip), %rbx ; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: callq *(%rbx) ; LINUX-64-PIC-NEXT: popq %rbx @@ -11110,7 +11110,7 @@ ; DARWIN-64-STATIC-LABEL: itailcaller: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: pushq %rbx -; DARWIN-64-STATIC-NEXT: movq _ifunc@GOTPCREL(%rip), %rbx +; DARWIN-64-STATIC-NEXT: movq _ifunc@{{.*}}(%rip), %rbx ; DARWIN-64-STATIC-NEXT: callq *(%rbx) ; DARWIN-64-STATIC-NEXT: callq *(%rbx) ; DARWIN-64-STATIC-NEXT: popq %rbx @@ -11119,7 +11119,7 @@ ; DARWIN-64-DYNAMIC-LABEL: itailcaller: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: pushq %rbx -; DARWIN-64-DYNAMIC-NEXT: movq _ifunc@GOTPCREL(%rip), %rbx +; DARWIN-64-DYNAMIC-NEXT: movq _ifunc@{{.*}}(%rip), %rbx ; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) ; DARWIN-64-DYNAMIC-NEXT: callq *(%rbx) ; DARWIN-64-DYNAMIC-NEXT: popq %rbx @@ -11128,7 +11128,7 @@ ; DARWIN-64-PIC-LABEL: itailcaller: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: pushq %rbx -; DARWIN-64-PIC-NEXT: movq _ifunc@GOTPCREL(%rip), %rbx +; DARWIN-64-PIC-NEXT: movq _ifunc@{{.*}}(%rip), %rbx ; DARWIN-64-PIC-NEXT: callq *(%rbx) ; DARWIN-64-PIC-NEXT: callq *(%rbx) ; DARWIN-64-PIC-NEXT: popq %rbx @@ -11146,7 +11146,7 @@ ; LINUX-64-STATIC-LABEL: ditailcaller: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: pushq %rax -; LINUX-64-STATIC-NEXT: callq *difunc(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: popq %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -11167,7 +11167,7 @@ ; LINUX-64-PIC-LABEL: ditailcaller: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: pushq %rax -; LINUX-64-PIC-NEXT: movq difunc@GOTPCREL(%rip), %rax +; LINUX-64-PIC-NEXT: movq difunc@{{.*}}(%rip), %rax ; LINUX-64-PIC-NEXT: callq *(%rax) ; LINUX-64-PIC-NEXT: popq %rax ; LINUX-64-PIC-NEXT: retq @@ -11199,21 +11199,21 @@ ; DARWIN-64-STATIC-LABEL: ditailcaller: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: pushq %rax -; DARWIN-64-STATIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: popq %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: ditailcaller: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: pushq %rax -; DARWIN-64-DYNAMIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: popq %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: ditailcaller: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: pushq %rax -; DARWIN-64-PIC-NEXT: callq *_difunc(%rip) +; DARWIN-64-PIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: popq %rax ; DARWIN-64-PIC-NEXT: retq @@ -11227,7 +11227,7 @@ ; LINUX-64-STATIC-LABEL: litailcaller: ; LINUX-64-STATIC: # %bb.0: # %entry ; LINUX-64-STATIC-NEXT: pushq %rax -; LINUX-64-STATIC-NEXT: callq *lifunc(%rip) +; LINUX-64-STATIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-STATIC-NEXT: popq %rax ; LINUX-64-STATIC-NEXT: retq ; @@ -11248,7 +11248,7 @@ ; LINUX-64-PIC-LABEL: litailcaller: ; LINUX-64-PIC: # %bb.0: # %entry ; LINUX-64-PIC-NEXT: pushq %rax -; LINUX-64-PIC-NEXT: callq *lifunc(%rip) +; LINUX-64-PIC-NEXT: callq *{{.*}}(%rip) ; LINUX-64-PIC-NEXT: popq %rax ; LINUX-64-PIC-NEXT: retq ; @@ -11279,21 +11279,21 @@ ; DARWIN-64-STATIC-LABEL: litailcaller: ; DARWIN-64-STATIC: ## %bb.0: ## %entry ; DARWIN-64-STATIC-NEXT: pushq %rax -; DARWIN-64-STATIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-STATIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-STATIC-NEXT: popq %rax ; DARWIN-64-STATIC-NEXT: retq ; ; DARWIN-64-DYNAMIC-LABEL: litailcaller: ; DARWIN-64-DYNAMIC: ## %bb.0: ## %entry ; DARWIN-64-DYNAMIC-NEXT: pushq %rax -; DARWIN-64-DYNAMIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-DYNAMIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-DYNAMIC-NEXT: popq %rax ; DARWIN-64-DYNAMIC-NEXT: retq ; ; DARWIN-64-PIC-LABEL: litailcaller: ; DARWIN-64-PIC: ## %bb.0: ## %entry ; DARWIN-64-PIC-NEXT: pushq %rax -; DARWIN-64-PIC-NEXT: callq *_lifunc(%rip) +; DARWIN-64-PIC-NEXT: callq *{{.*}}(%rip) ; DARWIN-64-PIC-NEXT: popq %rax ; DARWIN-64-PIC-NEXT: retq Index: llvm/test/CodeGen/X86/absolute-bt.ll =================================================================== --- llvm/test/CodeGen/X86/absolute-bt.ll +++ llvm/test/CodeGen/X86/absolute-bt.ll @@ -33,8 +33,8 @@ %and = and i64 %load, 63 %shl = shl i64 1, %and %and2 = and i64 %shl, ptrtoint (i8* @bit_mask64 to i64) - ; CHECK: movabsq $bit_mask64, %rax - ; CHECK: btq %rcx, %rax + ; CHECK: movabsq $bit_mask64, %rcx + ; CHECK: btq %rax, %rcx %icmp = icmp eq i64 %and2, 0 br i1 %icmp, label %t, label %f Index: llvm/test/CodeGen/X86/add.ll =================================================================== --- llvm/test/CodeGen/X86/add.ll +++ llvm/test/CodeGen/X86/add.ll @@ -413,17 +413,17 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: subl {{[0-9]+}}(%esp), %edx ; X32-NEXT: xorl %esi, %esi ; X32-NEXT: subl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %edi, %edi ; X32-NEXT: subl {{[0-9]+}}(%esp), %edi -; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, 12(%eax) -; X32-NEXT: movl %edi, 8(%eax) -; X32-NEXT: movl %esi, 4(%eax) -; X32-NEXT: movl %edx, (%eax) +; X32-NEXT: movl %edi, 12(%eax) +; X32-NEXT: movl %esi, 8(%eax) +; X32-NEXT: movl %edx, 4(%eax) +; X32-NEXT: movl %ecx, (%eax) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: retl $4 Index: llvm/test/CodeGen/X86/anyext.ll =================================================================== --- llvm/test/CodeGen/X86/anyext.ll +++ llvm/test/CodeGen/X86/anyext.ll @@ -42,8 +42,8 @@ ; X64-LABEL: bar: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divw %si ; X64-NEXT: # kill: def $ax killed $ax def $eax ; X64-NEXT: andl $1, %eax Index: llvm/test/CodeGen/X86/atomic-eflags-reuse.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-eflags-reuse.ll +++ llvm/test/CodeGen/X86/atomic-eflags-reuse.ll @@ -228,7 +228,19 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: lock xaddq %rax, (%rdi) -; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: movb $12, %al +; CHECK-NEXT: js .LBB11_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movb $34, %al +; CHECK-NEXT: .LBB11_2: # %entry +; CHECK-NEXT: movb %al, (%rsi) +; CHECK-NEXT: movb $56, %al +; CHECK-NEXT: js .LBB11_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movb $78, %al +; CHECK-NEXT: .LBB11_4: # %entry +; CHECK-NEXT: retq entry: %add = atomicrmw add i64* %p, i64 1 seq_cst %cmp = icmp slt i64 %add, 0 Index: llvm/test/CodeGen/X86/atomic-minmax-i6432.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-minmax-i6432.ll +++ llvm/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -58,7 +58,6 @@ ; PIC-NEXT: popl %edi ; PIC-NEXT: popl %ebx ; PIC-NEXT: retl -; PIC-NEXT: ## -- End function entry: %max = atomicrmw max i64* @sc64, i64 5 acquire ret i64 %max @@ -112,7 +111,6 @@ ; PIC-NEXT: popl %esi ; PIC-NEXT: popl %ebx ; PIC-NEXT: retl -; PIC-NEXT: ## -- End function entry: %min = atomicrmw min i64* @sc64, i64 6 acquire ret i64 %min @@ -172,7 +170,6 @@ ; PIC-NEXT: popl %edi ; PIC-NEXT: popl %ebx ; PIC-NEXT: retl -; PIC-NEXT: ## -- End function entry: %umax = atomicrmw umax i64* @sc64, i64 7 acquire ret i64 %umax @@ -226,7 +223,6 @@ ; PIC-NEXT: popl %esi ; PIC-NEXT: popl %ebx ; PIC-NEXT: retl -; PIC-NEXT: ## -- End function entry: %umin = atomicrmw umin i64* @sc64, i64 8 acquire ret i64 %umin @@ -289,8 +285,6 @@ ; PIC-NEXT: popl %edi ; PIC-NEXT: popl %ebx ; PIC-NEXT: retl -; PIC-NEXT: ## -- End function -; PIC-NEXT: .zerofill __DATA,__bss,_id,8,3 ## @id entry: %tmp1 = atomicrmw add i64* @id, i64 1 seq_cst %tmp2 = add i64 %tmp1, 1 Index: llvm/test/CodeGen/X86/atomic_mi.ll =================================================================== --- llvm/test/CodeGen/X86/atomic_mi.ll +++ llvm/test/CodeGen/X86/atomic_mi.ll @@ -1805,35 +1805,29 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %edi -; X32-NEXT: .cfi_def_cfa_offset 12 ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 16 -; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %edi, -12 +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: xorl %esi, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%edi) +; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: movl %eax, %ebx +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: negl %ebx -; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl (%edi), %eax -; X32-NEXT: movl 4(%edi), %edx +; X32-NEXT: sbbl %edx, %ecx +; X32-NEXT: movl (%esi), %eax +; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB73_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: lock cmpxchg8b (%edi) +; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB73_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: popl %edi ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx ; X32-NEXT: .cfi_def_cfa_offset 4 @@ -2245,11 +2239,11 @@ ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: movl 20(%ebp), %esi +; X32-NEXT: movl 8(%ebp), %edi ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: movl 8(%ebp), %edi ; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) Index: llvm/test/CodeGen/X86/avx-cmp.ll =================================================================== --- llvm/test/CodeGen/X86/avx-cmp.ll +++ llvm/test/CodeGen/X86/avx-cmp.ll @@ -48,7 +48,7 @@ ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: testb %bpl, %bpl ; CHECK-NEXT: jne .LBB2_2 -; CHECK-NEXT: # %bb.4: # %for.body33 +; CHECK-NEXT: # %bb.4: # %for.body33.preheader ; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 ; CHECK-NEXT: vucomisd {{\.LCPI.*}}, %xmm0 ; CHECK-NEXT: jne .LBB2_5 Index: llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -1251,17 +1251,17 @@ ; WIN64-LABEL: test_argMultiRet: ; WIN64: # %bb.0: ; WIN64-NEXT: vaddsd __real@{{.*}}(%rip), %xmm1, %xmm1 +; WIN64-NEXT: movl $999, %edx # imm = 0x3E7 ; WIN64-NEXT: movl $4, %eax ; WIN64-NEXT: movb $7, %cl -; WIN64-NEXT: movl $999, %edx # imm = 0x3E7 ; WIN64-NEXT: retq ; ; LINUXOSX64-LABEL: test_argMultiRet: ; LINUXOSX64: # %bb.0: ; LINUXOSX64-NEXT: vaddsd {{.*}}(%rip), %xmm1, %xmm1 +; LINUXOSX64-NEXT: movl $999, %edx # imm = 0x3E7 ; LINUXOSX64-NEXT: movl $4, %eax ; LINUXOSX64-NEXT: movb $7, %cl -; LINUXOSX64-NEXT: movl $999, %edx # imm = 0x3E7 ; LINUXOSX64-NEXT: retq %6 = fadd double %1, 5.000000e+00 %7 = insertvalue %struct.complex undef, float %0, 0 Index: llvm/test/CodeGen/X86/bitreverse.ll =================================================================== --- llvm/test/CodeGen/X86/bitreverse.ll +++ llvm/test/CodeGen/X86/bitreverse.ll @@ -946,16 +946,16 @@ ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx ; X64-NEXT: bswapq %rbx -; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F ; X64-NEXT: movq %rbx, %r10 +; X64-NEXT: movabsq $1085102592571150095, %r13 # imm = 0xF0F0F0F0F0F0F0F ; X64-NEXT: andq %r13, %r10 ; X64-NEXT: shlq $4, %r10 ; X64-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0 ; X64-NEXT: andq %rax, %rbx ; X64-NEXT: shrq $4, %rbx ; X64-NEXT: orq %r10, %rbx -; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333 ; X64-NEXT: movq %rbx, %r10 +; X64-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333 ; X64-NEXT: andq %r11, %r10 ; X64-NEXT: movabsq $-3689348814741910324, %r14 # imm = 0xCCCCCCCCCCCCCCCC ; X64-NEXT: andq %r14, %rbx @@ -980,8 +980,8 @@ ; X64-NEXT: andq %r14, %rbp ; X64-NEXT: shrq $2, %rbp ; X64-NEXT: leaq (%rbp,%rdi,4), %rbp -; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555 ; X64-NEXT: movq %rbp, %r10 +; X64-NEXT: movabsq $6148914691236517205, %rbx # imm = 0x5555555555555555 ; X64-NEXT: andq %rbx, %r10 ; X64-NEXT: movabsq $-6148914691236517206, %rdi # imm = 0xAAAAAAAAAAAAAAAA ; X64-NEXT: andq %rdi, %rbp Index: llvm/test/CodeGen/X86/bmi.ll =================================================================== --- llvm/test/CodeGen/X86/bmi.ll +++ llvm/test/CodeGen/X86/bmi.ll @@ -502,8 +502,8 @@ ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: negl %eax ; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: andl %esi, %edx Index: llvm/test/CodeGen/X86/bss_pagealigned.ll =================================================================== --- llvm/test/CodeGen/X86/bss_pagealigned.ll +++ llvm/test/CodeGen/X86/bss_pagealigned.ll @@ -7,9 +7,9 @@ define void @unxlate_dev_mem_ptr(i64 %phis, i8* %addr) nounwind { %pte.addr.i = alloca %struct.kmem_cache_order_objects* %call8 = call i8* @memset(i8* bitcast ([512 x %struct.kmem_cache_order_objects]* @bm_pte to i8*), i32 0, i64 4096) -; CHECK: movq $bm_pte, %rdi +; CHECK: movl $4096, %edx +; CHECK-NEXT: movq $bm_pte, %rdi ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: movl $4096, %edx ; CHECK-NEXT: callq memset ret void } Index: llvm/test/CodeGen/X86/bug26810.ll =================================================================== --- llvm/test/CodeGen/X86/bug26810.ll +++ llvm/test/CodeGen/X86/bug26810.ll @@ -21,11 +21,8 @@ ; CHECK-LABEL: name: loop ; CHECK: bb.2.for.body: ; CHECK: SUBPDrr -; CHECK-NEXT: MOVAPSmr ; CHECK-NEXT: MULPDrm -; CHECK-NEXT: MOVAPSrm ; CHECK-NEXT: ADDPDrr -; CHECK-NEXT: MOVAPSmr ; CHECK-NEXT: ADD32ri8 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" Index: llvm/test/CodeGen/X86/bypass-slow-division-32.ll =================================================================== --- llvm/test/CodeGen/X86/bypass-slow-division-32.ll +++ llvm/test/CodeGen/X86/bypass-slow-division-32.ll @@ -97,8 +97,8 @@ ; CHECK-NEXT: testl $-256, %edi ; CHECK-NEXT: je .LBB3_4 ; CHECK-NEXT: .LBB3_5: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ebx ; CHECK-NEXT: jmp .LBB3_6 ; CHECK-NEXT: .LBB3_1: Index: llvm/test/CodeGen/X86/bypass-slow-division-64.ll =================================================================== --- llvm/test/CodeGen/X86/bypass-slow-division-64.ll +++ llvm/test/CodeGen/X86/bypass-slow-division-64.ll @@ -17,8 +17,8 @@ ; CHECK-NEXT: idivq %rsi ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-NEXT: retq @@ -40,8 +40,8 @@ ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_1: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %esi ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq @@ -63,8 +63,8 @@ ; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB2_1: -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %esi ; CHECK-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-NEXT: # kill: def $eax killed $eax def $rax Index: llvm/test/CodeGen/X86/cast-vsel.ll =================================================================== --- llvm/test/CodeGen/X86/cast-vsel.ll +++ llvm/test/CodeGen/X86/cast-vsel.ll @@ -277,8 +277,8 @@ define void @example25() nounwind { ; SSE2-LABEL: example25: ; SSE2: # %bb.0: # %vector.ph -; SSE2-NEXT: movq $-4096, %rax # imm = 0xF000 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1] +; SSE2-NEXT: movq $-4096, %rax # imm = 0xF000 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB5_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -307,8 +307,8 @@ ; ; SSE41-LABEL: example25: ; SSE41: # %bb.0: # %vector.ph -; SSE41-NEXT: movq $-4096, %rax # imm = 0xF000 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1] +; SSE41-NEXT: movq $-4096, %rax # imm = 0xF000 ; SSE41-NEXT: .p2align 4, 0x90 ; SSE41-NEXT: .LBB5_1: # %vector.body ; SSE41-NEXT: # =>This Inner Loop Header: Depth=1 @@ -336,8 +336,8 @@ ; ; AVX1-LABEL: example25: ; AVX1: # %bb.0: # %vector.ph -; AVX1-NEXT: movq $-4096, %rax # imm = 0xF000 ; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1] +; AVX1-NEXT: movq $-4096, %rax # imm = 0xF000 ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB5_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -474,7 +474,6 @@ ; AVX1-NEXT: vmovd %esi, %xmm1 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; AVX1-NEXT: movq $-4096, %rax # imm = 0xF000 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm2 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 @@ -483,6 +482,7 @@ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; AVX1-NEXT: vpmovsxwd %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: movq $-4096, %rax # imm = 0xF000 ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB6_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -502,9 +502,9 @@ ; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: vmovd %esi, %xmm1 ; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: movq $-4096, %rax # imm = 0xF000 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX2-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX2-NEXT: movq $-4096, %rax # imm = 0xF000 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB6_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/X86/clear-highbits.ll =================================================================== --- llvm/test/CodeGen/X86/clear-highbits.ll +++ llvm/test/CodeGen/X86/clear-highbits.ll @@ -514,9 +514,9 @@ ; X86-NOBMI2-LABEL: clear_highbits64_c0: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB13_2 @@ -567,9 +567,9 @@ ; X86-NOBMI2-LABEL: clear_highbits64_c1_indexzext: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB14_2 @@ -624,9 +624,9 @@ ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB15_2 @@ -684,9 +684,9 @@ ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB16_2 @@ -744,9 +744,9 @@ ; X86-NOBMI2-LABEL: clear_highbits64_c4_commutative: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shrl %cl, %edx +; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB17_2 @@ -880,24 +880,24 @@ ; X86-NOBMI2-NEXT: pushl %eax ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: shrl %cl, %esi ; X86-NOBMI2-NEXT: movl $-1, %edi -; X86-NOBMI2-NEXT: shrl %cl, %edi -; X86-NOBMI2-NEXT: shrdl %cl, %esi, %esi +; X86-NOBMI2-NEXT: shrdl %cl, %edi, %edi ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB19_2 ; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edi, %esi -; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi ; X86-NOBMI2-NEXT: .LBB19_2: ; X86-NOBMI2-NEXT: subl $8, %esp -; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: pushl %esi +; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: calll use64 ; X86-NOBMI2-NEXT: addl $16, %esp -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NOBMI2-NEXT: movl %esi, %eax -; X86-NOBMI2-NEXT: movl %edi, %edx +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %edi, %eax +; X86-NOBMI2-NEXT: movl %esi, %edx ; X86-NOBMI2-NEXT: addl $4, %esp ; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: popl %edi Index: llvm/test/CodeGen/X86/clear-lowbits.ll =================================================================== --- llvm/test/CodeGen/X86/clear-lowbits.ll +++ llvm/test/CodeGen/X86/clear-lowbits.ll @@ -499,9 +499,9 @@ ; X86-NOBMI2-LABEL: clear_lowbits64_c0: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB13_2 @@ -552,9 +552,9 @@ ; X86-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB14_2 @@ -609,9 +609,9 @@ ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB15_2 @@ -669,9 +669,9 @@ ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB16_2 @@ -729,9 +729,9 @@ ; X86-NOBMI2-LABEL: clear_lowbits64_c4_commutative: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB17_2 @@ -1328,9 +1328,9 @@ ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl $64, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB31_2 @@ -1386,9 +1386,9 @@ ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movb $64, %cl ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB32_2 @@ -1448,9 +1448,9 @@ ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movl $64, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB33_2 @@ -1513,9 +1513,9 @@ ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: movb $64, %cl ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB34_2 @@ -1578,9 +1578,9 @@ ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl $64, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax +; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB35_2 @@ -1718,24 +1718,24 @@ ; X86-NOBMI2-NEXT: pushl %eax ; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %esi +; X86-NOBMI2-NEXT: shll %cl, %esi ; X86-NOBMI2-NEXT: movl $-1, %edi -; X86-NOBMI2-NEXT: shll %cl, %edi -; X86-NOBMI2-NEXT: shldl %cl, %esi, %esi +; X86-NOBMI2-NEXT: shldl %cl, %edi, %edi ; X86-NOBMI2-NEXT: testb $32, %cl ; X86-NOBMI2-NEXT: je .LBB37_2 ; X86-NOBMI2-NEXT: # %bb.1: -; X86-NOBMI2-NEXT: movl %edi, %esi -; X86-NOBMI2-NEXT: xorl %edi, %edi +; X86-NOBMI2-NEXT: movl %esi, %edi +; X86-NOBMI2-NEXT: xorl %esi, %esi ; X86-NOBMI2-NEXT: .LBB37_2: ; X86-NOBMI2-NEXT: subl $8, %esp -; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: pushl %edi +; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: calll use64 ; X86-NOBMI2-NEXT: addl $16, %esp -; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %edi -; X86-NOBMI2-NEXT: movl %edi, %eax -; X86-NOBMI2-NEXT: movl %esi, %edx +; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI2-NEXT: movl %esi, %eax +; X86-NOBMI2-NEXT: movl %edi, %edx ; X86-NOBMI2-NEXT: addl $4, %esp ; X86-NOBMI2-NEXT: popl %esi ; X86-NOBMI2-NEXT: popl %edi Index: llvm/test/CodeGen/X86/clz.ll =================================================================== --- llvm/test/CodeGen/X86/clz.ll +++ llvm/test/CodeGen/X86/clz.ll @@ -289,7 +289,7 @@ ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl ; X32-NEXT: .LBB8_1: -; X32-NEXT: movb $8, %al +; X32-NEXT: movb $8, %al ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl ; @@ -304,7 +304,7 @@ ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; X64-NEXT: .LBB8_1: -; X64-NEXT: movb $8, %al +; X64-NEXT: movb $8, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; @@ -340,7 +340,7 @@ ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: retl ; X32-NEXT: .LBB9_1: -; X32-NEXT: movw $16, %ax +; X32-NEXT: movw $16, %ax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: retl ; @@ -383,7 +383,7 @@ ; X32-NEXT: xorl $31, %eax ; X32-NEXT: retl ; X32-NEXT: .LBB10_1: -; X32-NEXT: movl $32, %eax +; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; ; X64-LABEL: ctlz_i32_zero_test: @@ -482,7 +482,7 @@ ; X32-NEXT: bsfl %eax, %eax ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl -; X32-NEXT: .LBB12_1 +; X32-NEXT: .LBB12_1: ; X32-NEXT: movb $8, %al ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retl @@ -530,7 +530,7 @@ ; X32-NEXT: # %bb.2: # %cond.false ; X32-NEXT: bsfw %ax, %ax ; X32-NEXT: retl -; X32-NEXT: .LBB13_1 +; X32-NEXT: .LBB13_1: ; X32-NEXT: movw $16, %ax ; X32-NEXT: retl ; @@ -568,7 +568,7 @@ ; X32-NEXT: # %bb.2: # %cond.false ; X32-NEXT: bsfl %eax, %eax ; X32-NEXT: retl -; X32-NEXT: .LBB14_1 +; X32-NEXT: .LBB14_1: ; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; @@ -667,7 +667,7 @@ ; X32-NEXT: bsrl %eax, %eax ; X32-NEXT: xorl $31, %eax ; X32-NEXT: retl -; X32-NEXT: .LBB16_1 +; X32-NEXT: .LBB16_1: ; X32-NEXT: movl $32, %eax ; X32-NEXT: retl ; Index: llvm/test/CodeGen/X86/cmov.ll =================================================================== --- llvm/test/CodeGen/X86/cmov.ll +++ llvm/test/CodeGen/X86/cmov.ll @@ -105,8 +105,8 @@ ; CHECK-NEXT: jne .LBB3_7 ; CHECK-NEXT: # %bb.6: # %bb.i.i ; CHECK-NEXT: movb {{.*}}(%rip), %cl -; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: .LBB3_7: # %func_1.exit ; CHECK-NEXT: movb %cl, {{.*}}(%rip) ; CHECK-NEXT: movzbl %cl, %esi Index: llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll =================================================================== --- llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll +++ llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll @@ -87,12 +87,12 @@ ; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rcx, %rbx ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: sete %sil -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/cmpxchg16b.ll =================================================================== --- llvm/test/CodeGen/X86/cmpxchg16b.ll +++ llvm/test/CodeGen/X86/cmpxchg16b.ll @@ -6,10 +6,10 @@ ; CHECK-LABEL: t1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: lock cmpxchg16b (%rdi) ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/code-model-elf-memset.ll =================================================================== --- llvm/test/CodeGen/X86/code-model-elf-memset.ll +++ llvm/test/CodeGen/X86/code-model-elf-memset.ll @@ -30,8 +30,8 @@ ; SMALL-PIC-NEXT: .cfi_def_cfa_offset 432 ; SMALL-PIC-NEXT: movl $0, {{[0-9]+}}(%rsp) ; SMALL-PIC-NEXT: leaq {{[0-9]+}}(%rsp), %rdi -; SMALL-PIC-NEXT: xorl %esi, %esi ; SMALL-PIC-NEXT: movl $400, %edx # imm = 0x190 +; SMALL-PIC-NEXT: xorl %esi, %esi ; SMALL-PIC-NEXT: callq memset@PLT ; SMALL-PIC-NEXT: xorl %eax, %eax ; SMALL-PIC-NEXT: addq $424, %rsp # imm = 0x1A8 @@ -44,8 +44,8 @@ ; MEDIUM-PIC-NEXT: .cfi_def_cfa_offset 432 ; MEDIUM-PIC-NEXT: movl $0, {{[0-9]+}}(%rsp) ; MEDIUM-PIC-NEXT: leaq {{[0-9]+}}(%rsp), %rdi -; MEDIUM-PIC-NEXT: xorl %esi, %esi ; MEDIUM-PIC-NEXT: movl $400, %edx # imm = 0x190 +; MEDIUM-PIC-NEXT: xorl %esi, %esi ; MEDIUM-PIC-NEXT: callq memset@PLT ; MEDIUM-PIC-NEXT: xorl %eax, %eax ; MEDIUM-PIC-NEXT: addq $424, %rsp # imm = 0x1A8 @@ -62,9 +62,9 @@ ; LARGE-PIC-NEXT: addq %rcx, %rax ; LARGE-PIC-NEXT: movl $0, {{[0-9]+}}(%rsp) ; LARGE-PIC-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; LARGE-PIC-NEXT: movl $400, %edx # imm = 0x190 ; LARGE-PIC-NEXT: movabsq $memset@GOT, %rcx ; LARGE-PIC-NEXT: xorl %esi, %esi -; LARGE-PIC-NEXT: movl $400, %edx # imm = 0x190 ; LARGE-PIC-NEXT: callq *(%rax,%rcx) ; LARGE-PIC-NEXT: xorl %eax, %eax ; LARGE-PIC-NEXT: addq $424, %rsp # imm = 0x1A8 Index: llvm/test/CodeGen/X86/code-model-elf.ll =================================================================== --- llvm/test/CodeGen/X86/code-model-elf.ll +++ llvm/test/CodeGen/X86/code-model-elf.ll @@ -56,12 +56,12 @@ ; ; SMALL-PIC-LABEL: lea_static_data: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: leaq static_data(%rip), %rax +; SMALL-PIC-NEXT: leaq {{.*}}(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_static_data: ; MEDIUM-PIC: # %bb.0: -; MEDIUM-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx +; MEDIUM-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; MEDIUM-PIC-NEXT: movabsq $static_data@GOTOFF, %rax ; MEDIUM-PIC-NEXT: addq %rcx, %rax ; MEDIUM-PIC-NEXT: retq @@ -69,7 +69,7 @@ ; LARGE-PIC-LABEL: lea_static_data: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp0: -; LARGE-PIC-NEXT: leaq .Ltmp0(%rip), %rcx +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp0, %rax ; LARGE-PIC-NEXT: addq %rax, %rcx ; LARGE-PIC-NEXT: movabsq $static_data@GOTOFF, %rax @@ -96,12 +96,12 @@ ; ; SMALL-PIC-LABEL: lea_global_data: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: leaq global_data(%rip), %rax +; SMALL-PIC-NEXT: leaq {{.*}}(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_global_data: ; MEDIUM-PIC: # %bb.0: -; MEDIUM-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx +; MEDIUM-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; MEDIUM-PIC-NEXT: movabsq $global_data@GOTOFF, %rax ; MEDIUM-PIC-NEXT: addq %rcx, %rax ; MEDIUM-PIC-NEXT: retq @@ -109,7 +109,7 @@ ; LARGE-PIC-LABEL: lea_global_data: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp1: -; LARGE-PIC-NEXT: leaq .Ltmp1(%rip), %rcx +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp1, %rax ; LARGE-PIC-NEXT: addq %rax, %rcx ; LARGE-PIC-NEXT: movabsq $global_data@GOTOFF, %rax @@ -136,18 +136,18 @@ ; ; SMALL-PIC-LABEL: lea_extern_data: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: movq extern_data@GOTPCREL(%rip), %rax +; SMALL-PIC-NEXT: movq extern_data@{{.*}}(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_extern_data: ; MEDIUM-PIC: # %bb.0: -; MEDIUM-PIC-NEXT: movq extern_data@GOTPCREL(%rip), %rax +; MEDIUM-PIC-NEXT: movq extern_data@{{.*}}(%rip), %rax ; MEDIUM-PIC-NEXT: retq ; ; LARGE-PIC-LABEL: lea_extern_data: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp2: -; LARGE-PIC-NEXT: leaq .Ltmp2(%rip), %rax +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp2, %rcx ; LARGE-PIC-NEXT: addq %rcx, %rax ; LARGE-PIC-NEXT: movabsq $extern_data@GOT, %rcx @@ -159,7 +159,7 @@ define dso_local i32 @load_global_data() #0 { ; SMALL-STATIC-LABEL: load_global_data: ; SMALL-STATIC: # %bb.0: -; SMALL-STATIC-NEXT: movl global_data+8(%rip), %eax +; SMALL-STATIC-NEXT: movl global_data+{{.*}}(%rip), %eax ; SMALL-STATIC-NEXT: retq ; ; MEDIUM-STATIC-LABEL: load_global_data: @@ -176,12 +176,12 @@ ; ; SMALL-PIC-LABEL: load_global_data: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: movl global_data+8(%rip), %eax +; SMALL-PIC-NEXT: movl global_data+{{.*}}(%rip), %eax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: load_global_data: ; MEDIUM-PIC: # %bb.0: -; MEDIUM-PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax +; MEDIUM-PIC-NEXT: leaq {{.*}}(%rip), %rax ; MEDIUM-PIC-NEXT: movabsq $global_data@GOTOFF, %rcx ; MEDIUM-PIC-NEXT: movl 8(%rax,%rcx), %eax ; MEDIUM-PIC-NEXT: retq @@ -189,7 +189,7 @@ ; LARGE-PIC-LABEL: load_global_data: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp3: -; LARGE-PIC-NEXT: leaq .Ltmp3(%rip), %rax +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp3, %rcx ; LARGE-PIC-NEXT: addq %rcx, %rax ; LARGE-PIC-NEXT: movabsq $global_data@GOTOFF, %rcx @@ -202,7 +202,7 @@ define dso_local i32 @load_extern_data() #0 { ; SMALL-STATIC-LABEL: load_extern_data: ; SMALL-STATIC: # %bb.0: -; SMALL-STATIC-NEXT: movl extern_data+8(%rip), %eax +; SMALL-STATIC-NEXT: movl extern_data+{{.*}}(%rip), %eax ; SMALL-STATIC-NEXT: retq ; ; MEDIUM-STATIC-LABEL: load_extern_data: @@ -219,20 +219,20 @@ ; ; SMALL-PIC-LABEL: load_extern_data: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: movq extern_data@GOTPCREL(%rip), %rax +; SMALL-PIC-NEXT: movq extern_data@{{.*}}(%rip), %rax ; SMALL-PIC-NEXT: movl 8(%rax), %eax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: load_extern_data: ; MEDIUM-PIC: # %bb.0: -; MEDIUM-PIC-NEXT: movq extern_data@GOTPCREL(%rip), %rax +; MEDIUM-PIC-NEXT: movq extern_data@{{.*}}(%rip), %rax ; MEDIUM-PIC-NEXT: movl 8(%rax), %eax ; MEDIUM-PIC-NEXT: retq ; ; LARGE-PIC-LABEL: load_extern_data: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp4: -; LARGE-PIC-NEXT: leaq .Ltmp4(%rip), %rax +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp4, %rcx ; LARGE-PIC-NEXT: addq %rcx, %rax ; LARGE-PIC-NEXT: movabsq $extern_data@GOT, %rcx @@ -277,7 +277,7 @@ ; ; SMALL-PIC-LABEL: lea_static_fn: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: leaq static_fn(%rip), %rax +; SMALL-PIC-NEXT: leaq {{.*}}(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_static_fn: @@ -288,7 +288,7 @@ ; LARGE-PIC-LABEL: lea_static_fn: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp5: -; LARGE-PIC-NEXT: leaq .Ltmp5(%rip), %rcx +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp5, %rax ; LARGE-PIC-NEXT: addq %rax, %rcx ; LARGE-PIC-NEXT: movabsq $static_fn@GOTOFF, %rax @@ -315,7 +315,7 @@ ; ; SMALL-PIC-LABEL: lea_global_fn: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: leaq global_fn(%rip), %rax +; SMALL-PIC-NEXT: leaq {{.*}}(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_global_fn: @@ -326,7 +326,7 @@ ; LARGE-PIC-LABEL: lea_global_fn: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp6: -; LARGE-PIC-NEXT: leaq .Ltmp6(%rip), %rcx +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp6, %rax ; LARGE-PIC-NEXT: addq %rax, %rcx ; LARGE-PIC-NEXT: movabsq $global_fn@GOTOFF, %rax @@ -353,18 +353,18 @@ ; ; SMALL-PIC-LABEL: lea_extern_fn: ; SMALL-PIC: # %bb.0: -; SMALL-PIC-NEXT: movq extern_fn@GOTPCREL(%rip), %rax +; SMALL-PIC-NEXT: movq extern_fn@{{.*}}(%rip), %rax ; SMALL-PIC-NEXT: retq ; ; MEDIUM-PIC-LABEL: lea_extern_fn: ; MEDIUM-PIC: # %bb.0: -; MEDIUM-PIC-NEXT: movq extern_fn@GOTPCREL(%rip), %rax +; MEDIUM-PIC-NEXT: movq extern_fn@{{.*}}(%rip), %rax ; MEDIUM-PIC-NEXT: retq ; ; LARGE-PIC-LABEL: lea_extern_fn: ; LARGE-PIC: # %bb.0: ; LARGE-PIC-NEXT: .Ltmp7: -; LARGE-PIC-NEXT: leaq .Ltmp7(%rip), %rax +; LARGE-PIC-NEXT: leaq {{.*}}(%rip), %rax ; LARGE-PIC-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.Ltmp7, %rcx ; LARGE-PIC-NEXT: addq %rcx, %rax ; LARGE-PIC-NEXT: movabsq $extern_fn@GOT, %rcx Index: llvm/test/CodeGen/X86/combine-srem.ll =================================================================== --- llvm/test/CodeGen/X86/combine-srem.ll +++ llvm/test/CodeGen/X86/combine-srem.ll @@ -443,8 +443,8 @@ ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %esi ; CHECK-NEXT: movl %edx, %edi -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %esi ; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/critical-edge-split-2.ll =================================================================== --- llvm/test/CodeGen/X86/critical-edge-split-2.ll +++ llvm/test/CodeGen/X86/critical-edge-split-2.ll @@ -11,8 +11,8 @@ define i16 @test1(i1 zeroext %C, i8** nocapture %argv) nounwind ssp { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movw $1, %ax ; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: movw $1, %ax ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: # %cond.false.i ; CHECK-NEXT: movl $g_4, %eax Index: llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll =================================================================== --- llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll +++ llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll @@ -7,9 +7,9 @@ ; CHECK: callq _Z3fooPcjPKc ; CHECK: callq _Z3fooPcjPKc ; CHECK: movq %rsp, %rdi -; CHECK: movl $4, %esi ; CHECK: testl {{%[a-z]+}}, {{%[a-z]+}} ; CHECK: je .LBB0_4 +; CHECK: movl $4, %esi ; Regenerate test with this command: ; clang++ -emit-llvm -S -O2 -g Index: llvm/test/CodeGen/X86/divrem.ll =================================================================== --- llvm/test/CodeGen/X86/divrem.ll +++ llvm/test/CodeGen/X86/divrem.ll @@ -245,8 +245,8 @@ ; X64: # %bb.0: ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: movl %edi, %eax -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: divw %si ; X64-NEXT: movw %ax, (%r8) ; X64-NEXT: movw %dx, (%rcx) Index: llvm/test/CodeGen/X86/extract-bits.ll =================================================================== --- llvm/test/CodeGen/X86/extract-bits.ll +++ llvm/test/CodeGen/X86/extract-bits.ll @@ -505,8 +505,8 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB6_2: ; X86-NOBMI-NEXT: movl $1, %eax -; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %ch @@ -541,8 +541,8 @@ ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB6_2: ; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx ; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %ch @@ -577,8 +577,8 @@ ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB6_2: ; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx ; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %bl @@ -648,8 +648,8 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB7_2: ; X86-NOBMI-NEXT: movl $1, %eax -; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %ch @@ -684,8 +684,8 @@ ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB7_2: ; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx ; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %ch @@ -720,8 +720,8 @@ ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB7_2: ; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx ; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %bl @@ -797,8 +797,8 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB8_2: ; X86-NOBMI-NEXT: movl $1, %eax -; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %ch @@ -834,8 +834,8 @@ ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB8_2: ; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx ; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %ch @@ -871,8 +871,8 @@ ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB8_2: ; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx ; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %bl @@ -946,8 +946,8 @@ ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB9_2: ; X86-NOBMI-NEXT: movl $1, %eax -; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: testb $32, %ch @@ -983,8 +983,8 @@ ; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB9_2: ; X86-BMI1NOTBM-NEXT: movl $1, %eax -; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: movb %ch, %cl +; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %edx ; X86-BMI1NOTBM-NEXT: shll %cl, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %ch @@ -1020,8 +1020,8 @@ ; X86-BMI1BMI2-NEXT: xorl %edi, %edi ; X86-BMI1BMI2-NEXT: .LBB9_2: ; X86-BMI1BMI2-NEXT: movl $1, %eax -; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx +; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: shldl %cl, %eax, %edx ; X86-BMI1BMI2-NEXT: shlxl %ebx, %eax, %eax ; X86-BMI1BMI2-NEXT: testb $32, %bl @@ -1099,8 +1099,8 @@ ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB10_2: ; X86-NOBMI-NEXT: movl $1, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: shldl %cl, %esi, %edi ; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: testb $32, %ch @@ -1135,8 +1135,8 @@ ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB10_2: ; X86-BMI1NOTBM-NEXT: movl $1, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: movb %ch, %cl +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi ; X86-BMI1NOTBM-NEXT: shll %cl, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %ch @@ -1171,8 +1171,8 @@ ; X86-BMI1BMI2-NEXT: xorl %edx, %edx ; X86-BMI1BMI2-NEXT: .LBB10_2: ; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: movl %ebx, %ecx +; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi ; X86-BMI1BMI2-NEXT: shlxl %ebx, %edi, %ecx ; X86-BMI1BMI2-NEXT: testb $32, %bl @@ -1246,8 +1246,8 @@ ; X86-NOBMI-NEXT: xorl %ebp, %ebp ; X86-NOBMI-NEXT: .LBB11_2: ; X86-NOBMI-NEXT: movl $1, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: movl %edx, %ecx +; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: shldl %cl, %esi, %edi ; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: testb $32, %dl @@ -1296,8 +1296,8 @@ ; X86-BMI1NOTBM-NEXT: xorl %ebp, %ebp ; X86-BMI1NOTBM-NEXT: .LBB11_2: ; X86-BMI1NOTBM-NEXT: movl $1, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: movl %edx, %ecx +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: shldl %cl, %esi, %edi ; X86-BMI1NOTBM-NEXT: shll %cl, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %dl @@ -1345,8 +1345,8 @@ ; X86-BMI1BMI2-NEXT: xorl %ebp, %ebp ; X86-BMI1BMI2-NEXT: .LBB11_2: ; X86-BMI1BMI2-NEXT: movl $1, %edi -; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: movl %edx, %ecx +; X86-BMI1BMI2-NEXT: xorl %esi, %esi ; X86-BMI1BMI2-NEXT: shldl %cl, %edi, %esi ; X86-BMI1BMI2-NEXT: shlxl %edx, %edi, %edi ; X86-BMI1BMI2-NEXT: testb $32, %dl @@ -1898,10 +1898,10 @@ ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB18_2: -; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch ; X86-NOBMI-NEXT: je .LBB18_4 @@ -1935,19 +1935,19 @@ ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB18_2: +; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB18_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %edi, %ebx +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB18_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx @@ -2038,10 +2038,10 @@ ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB19_2: -; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch ; X86-NOBMI-NEXT: je .LBB19_4 @@ -2075,19 +2075,19 @@ ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB19_2: +; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB19_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %edi, %ebx +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB19_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx @@ -2184,10 +2184,10 @@ ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB20_2: -; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch ; X86-NOBMI-NEXT: je .LBB20_4 @@ -2222,19 +2222,19 @@ ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB20_2: +; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB20_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %edi, %ebx +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB20_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx @@ -2330,10 +2330,10 @@ ; X86-NOBMI-NEXT: movl %edi, %esi ; X86-NOBMI-NEXT: xorl %edi, %edi ; X86-NOBMI-NEXT: .LBB21_2: -; X86-NOBMI-NEXT: movl $-1, %edx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %ch ; X86-NOBMI-NEXT: je .LBB21_4 @@ -2368,19 +2368,19 @@ ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB21_2: +; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB21_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %edi, %ebx +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB21_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx @@ -2480,10 +2480,10 @@ ; X86-NOBMI-NEXT: movl %edx, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: .LBB22_2: -; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movb %ch, %cl +; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: shll %cl, %esi +; X86-NOBMI-NEXT: movl $-1, %edi ; X86-NOBMI-NEXT: shldl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %ch ; X86-NOBMI-NEXT: je .LBB22_4 @@ -2517,19 +2517,19 @@ ; X86-BMI1NOTBM-NEXT: movl %edx, %esi ; X86-BMI1NOTBM-NEXT: xorl %edx, %edx ; X86-BMI1NOTBM-NEXT: .LBB22_2: +; X86-BMI1NOTBM-NEXT: movl %eax, %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %edi +; X86-BMI1NOTBM-NEXT: shll %cl, %edi ; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl %eax, %ecx -; X86-BMI1NOTBM-NEXT: shll %cl, %ebx -; X86-BMI1NOTBM-NEXT: shldl %cl, %edi, %edi +; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %al ; X86-BMI1NOTBM-NEXT: je .LBB22_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %ebx, %edi -; X86-BMI1NOTBM-NEXT: xorl %ebx, %ebx +; X86-BMI1NOTBM-NEXT: movl %edi, %ebx +; X86-BMI1NOTBM-NEXT: xorl %edi, %edi ; X86-BMI1NOTBM-NEXT: .LBB22_4: -; X86-BMI1NOTBM-NEXT: andnl %edx, %edi, %edx -; X86-BMI1NOTBM-NEXT: andnl %esi, %ebx, %eax +; X86-BMI1NOTBM-NEXT: andnl %edx, %ebx, %edx +; X86-BMI1NOTBM-NEXT: andnl %esi, %edi, %eax ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi ; X86-BMI1NOTBM-NEXT: popl %ebx @@ -2624,28 +2624,28 @@ ; X86-NOBMI-NEXT: movl %ebp, %ebx ; X86-NOBMI-NEXT: xorl %ebp, %ebp ; X86-NOBMI-NEXT: .LBB23_2: +; X86-NOBMI-NEXT: movl %edx, %ecx ; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shll %cl, %esi ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: movl %edx, %ecx -; X86-NOBMI-NEXT: shll %cl, %edi -; X86-NOBMI-NEXT: shldl %cl, %esi, %esi +; X86-NOBMI-NEXT: shldl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %dl ; X86-NOBMI-NEXT: je .LBB23_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB23_4: -; X86-NOBMI-NEXT: notl %esi -; X86-NOBMI-NEXT: andl %ebp, %esi ; X86-NOBMI-NEXT: notl %edi -; X86-NOBMI-NEXT: andl %ebx, %edi +; X86-NOBMI-NEXT: andl %ebp, %edi +; X86-NOBMI-NEXT: notl %esi +; X86-NOBMI-NEXT: andl %ebx, %esi ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: movl %edi, %eax -; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: movl %edi, %edx ; X86-NOBMI-NEXT: addl $12, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi @@ -2674,10 +2674,10 @@ ; X86-BMI1NOTBM-NEXT: movl %esi, %edi ; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB23_2: -; X86-BMI1NOTBM-NEXT: movl $-1, %ebx -; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: movl %edx, %ecx +; X86-BMI1NOTBM-NEXT: movl $-1, %ebp ; X86-BMI1NOTBM-NEXT: shll %cl, %ebp +; X86-BMI1NOTBM-NEXT: movl $-1, %ebx ; X86-BMI1NOTBM-NEXT: shldl %cl, %ebx, %ebx ; X86-BMI1NOTBM-NEXT: testb $32, %dl ; X86-BMI1NOTBM-NEXT: je .LBB23_4 @@ -3253,9 +3253,9 @@ ; X86-NOBMI-NEXT: .LBB30_2: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB30_4 @@ -3287,9 +3287,9 @@ ; X86-BMI1NOTBM-NEXT: .LBB30_2: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB30_4 @@ -3387,9 +3387,9 @@ ; X86-NOBMI-NEXT: .LBB31_2: ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB31_4 @@ -3421,9 +3421,9 @@ ; X86-BMI1NOTBM-NEXT: .LBB31_2: ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB31_4 @@ -3527,9 +3527,9 @@ ; X86-NOBMI-NEXT: .LBB32_2: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB32_4 @@ -3562,9 +3562,9 @@ ; X86-BMI1NOTBM-NEXT: .LBB32_2: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB32_4 @@ -3666,9 +3666,9 @@ ; X86-NOBMI-NEXT: .LBB33_2: ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB33_4 @@ -3701,9 +3701,9 @@ ; X86-BMI1NOTBM-NEXT: .LBB33_2: ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB33_4 @@ -3809,9 +3809,9 @@ ; X86-NOBMI-NEXT: .LBB34_2: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: movl $-1, %edi ; X86-NOBMI-NEXT: shrl %cl, %edi +; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB34_4 @@ -3843,9 +3843,9 @@ ; X86-BMI1NOTBM-NEXT: .LBB34_2: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi ; X86-BMI1NOTBM-NEXT: shrl %cl, %edi +; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB34_4 @@ -3949,24 +3949,24 @@ ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %esi +; X86-NOBMI-NEXT: shrl %cl, %esi ; X86-NOBMI-NEXT: movl $-1, %edi -; X86-NOBMI-NEXT: shrl %cl, %edi -; X86-NOBMI-NEXT: shrdl %cl, %esi, %esi +; X86-NOBMI-NEXT: shrdl %cl, %edi, %edi ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB35_4 ; X86-NOBMI-NEXT: # %bb.3: -; X86-NOBMI-NEXT: movl %edi, %esi -; X86-NOBMI-NEXT: xorl %edi, %edi +; X86-NOBMI-NEXT: movl %esi, %edi +; X86-NOBMI-NEXT: xorl %esi, %esi ; X86-NOBMI-NEXT: .LBB35_4: -; X86-NOBMI-NEXT: andl %ebx, %edi -; X86-NOBMI-NEXT: andl %edx, %esi +; X86-NOBMI-NEXT: andl %ebx, %esi +; X86-NOBMI-NEXT: andl %edx, %edi ; X86-NOBMI-NEXT: subl $8, %esp ; X86-NOBMI-NEXT: pushl %ebp ; X86-NOBMI-NEXT: pushl %eax ; X86-NOBMI-NEXT: calll use64 ; X86-NOBMI-NEXT: addl $16, %esp -; X86-NOBMI-NEXT: movl %esi, %eax -; X86-NOBMI-NEXT: movl %edi, %edx +; X86-NOBMI-NEXT: movl %edi, %eax +; X86-NOBMI-NEXT: movl %esi, %edx ; X86-NOBMI-NEXT: addl $12, %esp ; X86-NOBMI-NEXT: popl %esi ; X86-NOBMI-NEXT: popl %edi @@ -3998,24 +3998,24 @@ ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI1NOTBM-NEXT: movl $-1, %esi +; X86-BMI1NOTBM-NEXT: shrl %cl, %esi ; X86-BMI1NOTBM-NEXT: movl $-1, %edi -; X86-BMI1NOTBM-NEXT: shrl %cl, %edi -; X86-BMI1NOTBM-NEXT: shrdl %cl, %esi, %esi +; X86-BMI1NOTBM-NEXT: shrdl %cl, %edi, %edi ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB35_4 ; X86-BMI1NOTBM-NEXT: # %bb.3: -; X86-BMI1NOTBM-NEXT: movl %edi, %esi -; X86-BMI1NOTBM-NEXT: xorl %edi, %edi +; X86-BMI1NOTBM-NEXT: movl %esi, %edi +; X86-BMI1NOTBM-NEXT: xorl %esi, %esi ; X86-BMI1NOTBM-NEXT: .LBB35_4: -; X86-BMI1NOTBM-NEXT: andl %ebx, %edi -; X86-BMI1NOTBM-NEXT: andl %edx, %esi +; X86-BMI1NOTBM-NEXT: andl %ebx, %esi +; X86-BMI1NOTBM-NEXT: andl %edx, %edi ; X86-BMI1NOTBM-NEXT: subl $8, %esp ; X86-BMI1NOTBM-NEXT: pushl %ebp ; X86-BMI1NOTBM-NEXT: pushl %eax ; X86-BMI1NOTBM-NEXT: calll use64 ; X86-BMI1NOTBM-NEXT: addl $16, %esp -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: movl %edi, %edx +; X86-BMI1NOTBM-NEXT: movl %edi, %eax +; X86-BMI1NOTBM-NEXT: movl %esi, %edx ; X86-BMI1NOTBM-NEXT: addl $12, %esp ; X86-BMI1NOTBM-NEXT: popl %esi ; X86-BMI1NOTBM-NEXT: popl %edi Index: llvm/test/CodeGen/X86/extract-lowbits.ll =================================================================== --- llvm/test/CodeGen/X86/extract-lowbits.ll +++ llvm/test/CodeGen/X86/extract-lowbits.ll @@ -995,9 +995,9 @@ ; X86-NOBMI-LABEL: bzhi64_b0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB15_2 @@ -1013,21 +1013,19 @@ ; ; X86-BMI1NOTBM-LABEL: bzhi64_b0: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: movl $-1, %edx +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB15_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB15_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b0: @@ -1078,9 +1076,9 @@ ; X86-NOBMI-LABEL: bzhi64_b1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB16_2 @@ -1096,21 +1094,19 @@ ; ; X86-BMI1NOTBM-LABEL: bzhi64_b1_indexzext: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: movl $-1, %edx +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB16_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB16_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext: @@ -1166,9 +1162,9 @@ ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB17_2 @@ -1188,9 +1184,9 @@ ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: shll %cl, %esi +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB17_2 @@ -1255,9 +1251,9 @@ ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB18_2 @@ -1277,9 +1273,9 @@ ; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: movl $-1, %esi ; X86-BMI1NOTBM-NEXT: shll %cl, %esi +; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB18_2 @@ -1345,9 +1341,9 @@ ; X86-NOBMI-LABEL: bzhi64_b4_commutative: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shldl %cl, %edx, %edx ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB19_2 @@ -1363,21 +1359,19 @@ ; ; X86-BMI1NOTBM-LABEL: bzhi64_b4_commutative: ; X86-BMI1NOTBM: # %bb.0: -; X86-BMI1NOTBM-NEXT: pushl %esi ; X86-BMI1NOTBM-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1NOTBM-NEXT: movl $-1, %eax -; X86-BMI1NOTBM-NEXT: movl $-1, %esi -; X86-BMI1NOTBM-NEXT: shll %cl, %esi -; X86-BMI1NOTBM-NEXT: shldl %cl, %eax, %eax +; X86-BMI1NOTBM-NEXT: shll %cl, %eax +; X86-BMI1NOTBM-NEXT: movl $-1, %edx +; X86-BMI1NOTBM-NEXT: shldl %cl, %edx, %edx ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB19_2 ; X86-BMI1NOTBM-NEXT: # %bb.1: -; X86-BMI1NOTBM-NEXT: movl %esi, %eax -; X86-BMI1NOTBM-NEXT: xorl %esi, %esi +; X86-BMI1NOTBM-NEXT: movl %eax, %edx +; X86-BMI1NOTBM-NEXT: xorl %eax, %eax ; X86-BMI1NOTBM-NEXT: .LBB19_2: -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %edx -; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %esi, %eax -; X86-BMI1NOTBM-NEXT: popl %esi +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx +; X86-BMI1NOTBM-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI1NOTBM-NEXT: retl ; ; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative: @@ -1695,9 +1689,9 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB25_2 @@ -1713,9 +1707,9 @@ ; X86-BMI1NOTBM: # %bb.0: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB25_2 @@ -1775,9 +1769,9 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB26_2 @@ -1793,9 +1787,9 @@ ; X86-BMI1NOTBM: # %bb.0: ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB26_2 @@ -1860,9 +1854,9 @@ ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB27_2 @@ -1881,9 +1875,9 @@ ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB27_2 @@ -1950,9 +1944,9 @@ ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movb $64, %cl ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB28_2 @@ -1971,9 +1965,9 @@ ; X86-BMI1NOTBM-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1NOTBM-NEXT: movb $64, %cl ; X86-BMI1NOTBM-NEXT: subb {{[0-9]+}}(%esp), %cl -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB28_2 @@ -2041,9 +2035,9 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl $64, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shrl %cl, %edx +; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shrdl %cl, %eax, %eax ; X86-NOBMI-NEXT: testb $32, %cl ; X86-NOBMI-NEXT: je .LBB29_2 @@ -2059,9 +2053,9 @@ ; X86-BMI1NOTBM: # %bb.0: ; X86-BMI1NOTBM-NEXT: movl $64, %ecx ; X86-BMI1NOTBM-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: movl $-1, %edx ; X86-BMI1NOTBM-NEXT: shrl %cl, %edx +; X86-BMI1NOTBM-NEXT: movl $-1, %eax ; X86-BMI1NOTBM-NEXT: shrdl %cl, %eax, %eax ; X86-BMI1NOTBM-NEXT: testb $32, %cl ; X86-BMI1NOTBM-NEXT: je .LBB29_2 Index: llvm/test/CodeGen/X86/fast-isel-mem.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-mem.ll +++ llvm/test/CodeGen/X86/fast-isel-mem.ll @@ -36,8 +36,8 @@ store i32 (...)** getelementptr ([4 x i32 (...)*], [4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4 ret void ; CHECK: _t: -; CHECK: xorl %eax, %eax -; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx +; CHECK: movl L_LotsStuff$non_lazy_ptr, %e{{..}} +; CHECK: xorl %e{{..}}, %e{{..}} ; ATOM: _t: ; ATOM: movl L_LotsStuff$non_lazy_ptr, %e{{..}} Index: llvm/test/CodeGen/X86/funnel-shift.ll =================================================================== --- llvm/test/CodeGen/X86/funnel-shift.ll +++ llvm/test/CodeGen/X86/funnel-shift.ll @@ -296,8 +296,8 @@ ; X64-AVX2-LABEL: fshr_i37: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movq %rdx, %r8 -; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF ; X64-AVX2-NEXT: movq %rsi, %r9 +; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF ; X64-AVX2-NEXT: andq %rax, %r9 ; X64-AVX2-NEXT: andq %rax, %r8 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rcx # imm = 0xDD67C8A60DD67C8B Index: llvm/test/CodeGen/X86/hoist-spill.ll =================================================================== --- llvm/test/CodeGen/X86/hoist-spill.ll +++ llvm/test/CodeGen/X86/hoist-spill.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s | FileCheck %s ; Check no spills to the same stack slot after hoisting. +; There should only be one spill here. + ; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp) -; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp) -; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp) -; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp) +; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/CodeGen/X86/instr-symbols.mir =================================================================== --- llvm/test/CodeGen/X86/instr-symbols.mir +++ llvm/test/CodeGen/X86/instr-symbols.mir @@ -60,10 +60,10 @@ %3:gr64 = MOV64ri32 %4:gr64 = MOV64ri32 %5:gr64 = MOV64ri32 - ; CHECK: movq $.Lpre_f, %{{.*}} - ; CHECK-NEXT: movq $.Lpost_f, %{{.*}} - ; CHECK-NEXT: movq $.Lpre_g, %{{.*}} - ; CHECK-NEXT: movq $.Lpost_h, %{{.*}} + ; CHECK: movq $.Lpre_f, %{{.*}} + ; CHECK: movq $.Lpost_f, %{{.*}} + ; CHECK: movq $.Lpre_g, %{{.*}} + ; CHECK: movq $.Lpost_h, %{{.*}} %6:gr64 = ADD64rr killed %2, killed %3, implicit-def $eflags %7:gr64 = ADD64rr killed %4, killed %5, implicit-def $eflags Index: llvm/test/CodeGen/X86/known-bits.ll =================================================================== --- llvm/test/CodeGen/X86/known-bits.ll +++ llvm/test/CodeGen/X86/known-bits.ll @@ -26,29 +26,29 @@ ; X32-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X32-NEXT: vpextrd $1, %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: vpextrd $1, %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: vmovd %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X32-NEXT: vmovd %xmm0, (%esp) # 4-byte Folded Spill ; X32-NEXT: vpextrd $2, %xmm1, %edi -; X32-NEXT: vpextrd $2, %xmm0, %esi +; X32-NEXT: vpextrd $2, %xmm0, %ebp ; X32-NEXT: vpextrd $3, %xmm1, %ebx -; X32-NEXT: vpextrd $3, %xmm0, %ebp +; X32-NEXT: vpextrd $3, %xmm0, %esi +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB0_1: # %CF ; X32-NEXT: # =>This Loop Header: Depth=1 ; X32-NEXT: # Child Loop BB0_2 Depth 2 -; X32-NEXT: xorl %edx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: divl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: xorl %edx, %edx +; X32-NEXT: divl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: divl (%esp) # 4-byte Folded Reload ; X32-NEXT: xorl %edx, %edx +; X32-NEXT: divl (%esp) # 4-byte Folded Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: divl %esi ; X32-NEXT: xorl %edx, %edx -; X32-NEXT: movl %ebx, %eax ; X32-NEXT: divl %ebp +; X32-NEXT: movl %ebx, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: divl %esi ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB0_2: # %CF237 ; X32-NEXT: # Parent Loop BB0_1 Depth=1 @@ -76,34 +76,34 @@ ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X64-NEXT: vpextrd $1, %xmm1, %r8d ; X64-NEXT: vpextrd $1, %xmm0, %r9d -; X64-NEXT: xorl %esi, %esi ; X64-NEXT: vmovd %xmm1, %r10d ; X64-NEXT: vmovd %xmm0, %r11d ; X64-NEXT: vpextrd $2, %xmm1, %edi -; X64-NEXT: vpextrd $2, %xmm0, %ebx +; X64-NEXT: vpextrd $2, %xmm0, %esi ; X64-NEXT: vpextrd $3, %xmm1, %ecx -; X64-NEXT: vpextrd $3, %xmm0, %ebp +; X64-NEXT: vpextrd $3, %xmm0, %ebx +; X64-NEXT: xorl %ebp, %ebp ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB0_1: # %CF ; X64-NEXT: # =>This Loop Header: Depth=1 ; X64-NEXT: # Child Loop BB0_2 Depth 2 -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: movl %r8d, %eax -; X64-NEXT: divl %r9d ; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %r9d ; X64-NEXT: movl %r10d, %eax -; X64-NEXT: divl %r11d ; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %r11d ; X64-NEXT: movl %edi, %eax -; X64-NEXT: divl %ebx ; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %esi ; X64-NEXT: movl %ecx, %eax -; X64-NEXT: divl %ebp +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ebx ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB0_2: # %CF237 ; X64-NEXT: # Parent Loop BB0_1 Depth=1 ; X64-NEXT: # => This Inner Loop Header: Depth=2 -; X64-NEXT: testb %sil, %sil +; X64-NEXT: testb %bpl, %bpl ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: jmp .LBB0_1 BB: @@ -154,15 +154,15 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl $-1024, %esi # imm = 0xFC00 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: andl %esi, %edi -; X32-NEXT: andl {{[0-9]+}}(%esp), %esi -; X32-NEXT: addl %edi, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl $-1024, %edi # imm = 0xFC00 +; X32-NEXT: andl %edi, %esi +; X32-NEXT: andl {{[0-9]+}}(%esp), %edi +; X32-NEXT: addl %esi, %edi ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: shldl $22, %edx, %ecx -; X32-NEXT: shldl $22, %esi, %edx +; X32-NEXT: shldl $22, %edi, %edx ; X32-NEXT: movl %edx, 8(%eax) ; X32-NEXT: movl %ecx, 12(%eax) ; X32-NEXT: movl $0, 4(%eax) Index: llvm/test/CodeGen/X86/lsr-static-addr.ll =================================================================== --- llvm/test/CodeGen/X86/lsr-static-addr.ll +++ llvm/test/CodeGen/X86/lsr-static-addr.ll @@ -1,8 +1,8 @@ ; RUN: llc -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s ; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s -; CHECK: xorl %eax, %eax ; CHECK: movsd .LCPI0_0(%rip), %xmm0 +; CHECK: xorl %eax, %eax ; CHECK: align ; CHECK-NEXT: BB0_2: ; CHECK-NEXT: movsd A(,%rax,8) Index: llvm/test/CodeGen/X86/machine-cse.ll =================================================================== --- llvm/test/CodeGen/X86/machine-cse.ll +++ llvm/test/CodeGen/X86/machine-cse.ll @@ -63,8 +63,8 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: leal (%rsi,%rsi,2), %esi -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: # kill: def $edi killed $edi killed $rdi +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq printf ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: .p2align 4, 0x90 @@ -108,8 +108,8 @@ define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp { ; CHECK-LABEL: cross_mbb_phys_cse: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: ja .LBB2_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: sbbl %eax, %eax Index: llvm/test/CodeGen/X86/madd.ll =================================================================== --- llvm/test/CodeGen/X86/madd.ll +++ llvm/test/CodeGen/X86/madd.ll @@ -89,8 +89,8 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB1_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -197,9 +197,9 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB2_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -229,8 +229,8 @@ ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB2_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -265,8 +265,8 @@ ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB2_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -353,11 +353,11 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm8, %xmm8 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB3_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -399,9 +399,9 @@ ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax ; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8 -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB3_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -452,9 +452,9 @@ ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB3_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -485,8 +485,8 @@ ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: movl %edx, %eax ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: xorl %ecx, %ecx ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: xorl %ecx, %ecx ; AVX512F-NEXT: .p2align 4, 0x90 ; AVX512F-NEXT: .LBB3_1: # %vector.body ; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1 @@ -517,8 +517,8 @@ ; AVX512BW: # %bb.0: # %entry ; AVX512BW-NEXT: movl %edx, %eax ; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: xorl %ecx, %ecx ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: xorl %ecx, %ecx ; AVX512BW-NEXT: .p2align 4, 0x90 ; AVX512BW-NEXT: .LBB3_1: # %vector.body ; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1 @@ -666,8 +666,8 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB5_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -780,9 +780,9 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB6_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -820,8 +820,8 @@ ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB6_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -856,8 +856,8 @@ ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB6_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -946,11 +946,11 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm8, %xmm8 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm9, %xmm9 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB7_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1008,9 +1008,9 @@ ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB7_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1061,9 +1061,9 @@ ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB7_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1096,8 +1096,8 @@ ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: movl %edx, %eax ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: xorl %ecx, %ecx ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: xorl %ecx, %ecx ; AVX512F-NEXT: .p2align 4, 0x90 ; AVX512F-NEXT: .LBB7_1: # %vector.body ; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1130,8 +1130,8 @@ ; AVX512BW: # %bb.0: # %entry ; AVX512BW-NEXT: movl %edx, %eax ; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: xorl %ecx, %ecx ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: xorl %ecx, %ecx ; AVX512BW-NEXT: .p2align 4, 0x90 ; AVX512BW-NEXT: .LBB7_1: # %vector.body ; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1275,8 +1275,8 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB9_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1396,10 +1396,10 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB10_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1441,8 +1441,8 @@ ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB10_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1487,8 +1487,8 @@ ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB10_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1581,7 +1581,6 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm8, %xmm8 -; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pxor %xmm9, %xmm9 ; SSE2-NEXT: pxor %xmm10, %xmm10 @@ -1589,6 +1588,7 @@ ; SSE2-NEXT: pxor %xmm6, %xmm6 ; SSE2-NEXT: pxor %xmm5, %xmm5 ; SSE2-NEXT: pxor %xmm7, %xmm7 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB11_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1654,10 +1654,10 @@ ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax ; AVX1-NEXT: vpxor %xmm8, %xmm8, %xmm8 -; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpxor %xmm9, %xmm9, %xmm9 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB11_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1728,10 +1728,10 @@ ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB11_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1772,8 +1772,8 @@ ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB11_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -2416,8 +2416,8 @@ ; ; SKX_LARGE-LABEL: test_global_array: ; SKX_LARGE: # %bb.0: -; SKX_LARGE-NEXT: movabsq $glob_array, %rax ; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 +; SKX_LARGE-NEXT: movabsq $glob_array, %rax ; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} ; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0 ; SKX_LARGE-NEXT: retq Index: llvm/test/CodeGen/X86/memset-nonzero.ll =================================================================== --- llvm/test/CodeGen/X86/memset-nonzero.ll +++ llvm/test/CodeGen/X86/memset-nonzero.ll @@ -144,8 +144,8 @@ ; SSE: # %bb.0: ; SSE-NEXT: pushq %rax ; SSE-NEXT: .cfi_def_cfa_offset 16 -; SSE-NEXT: movl $42, %esi ; SSE-NEXT: movl $256, %edx # imm = 0x100 +; SSE-NEXT: movl $42, %esi ; SSE-NEXT: callq memset ; SSE-NEXT: popq %rax ; SSE-NEXT: .cfi_def_cfa_offset 8 Index: llvm/test/CodeGen/X86/misched-code-difference-with-debug.ll =================================================================== --- llvm/test/CodeGen/X86/misched-code-difference-with-debug.ll +++ llvm/test/CodeGen/X86/misched-code-difference-with-debug.ll @@ -28,7 +28,7 @@ declare i32 @test_function(%class.C*, i8 signext, i8 signext, i8 signext, ...) ; CHECK-LABEL: test_without_debug ; CHECK: movl [[A:%[a-z]+]], [[B:%[a-z]+]] -; CHECK-NEXT: movl [[A]], [[C:%[a-z]+]] +; CHECK: movl [[A]], [[C:%[a-z]+]] define void @test_without_debug() { entry: @@ -42,7 +42,7 @@ } ; CHECK-LABEL: test_with_debug ; CHECK: movl [[A]], [[B]] -; CHECK-NEXT: movl [[A]], [[C]] +; CHECK: movl [[A]], [[C]] define void @test_with_debug() !dbg !17 { entry: Index: llvm/test/CodeGen/X86/mul-constant-result.ll =================================================================== --- llvm/test/CodeGen/X86/mul-constant-result.ll +++ llvm/test/CodeGen/X86/mul-constant-result.ll @@ -249,8 +249,8 @@ ; X64-HSW: # %bb.0: ; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi ; X64-HSW-NEXT: cmpl $1, %esi -; X64-HSW-NEXT: movl $1, %ecx ; X64-HSW-NEXT: movl %esi, %eax +; X64-HSW-NEXT: movl $1, %ecx ; X64-HSW-NEXT: cmovgl %ecx, %eax ; X64-HSW-NEXT: testl %esi, %esi ; X64-HSW-NEXT: cmovel %ecx, %eax Index: llvm/test/CodeGen/X86/mul-i256.ll =================================================================== --- llvm/test/CodeGen/X86/mul-i256.ll +++ llvm/test/CodeGen/X86/mul-i256.ll @@ -57,9 +57,9 @@ ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %ebx -; X32-NEXT: xorl %edx, %edx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax +; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, %ebp ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill Index: llvm/test/CodeGen/X86/patchpoint.ll =================================================================== --- llvm/test/CodeGen/X86/patchpoint.ll +++ llvm/test/CodeGen/X86/patchpoint.ll @@ -103,8 +103,8 @@ entry: ; CHECK-LABEL: test_patchpoint_with_attributes: ; CHECK: movl $42, %edi -; CHECK: xorl %r10d, %r10d ; CHECK: movl $17, %esi +; CHECK: xorl %r10d, %r10d ; CHECK: movabsq $_consume_attributes, %r11 ; CHECK-NEXT: callq *%r11 ; CHECK-NEXT: xchgw %ax, %ax Index: llvm/test/CodeGen/X86/popcnt.ll =================================================================== --- llvm/test/CodeGen/X86/popcnt.ll +++ llvm/test/CodeGen/X86/popcnt.ll @@ -215,12 +215,12 @@ ; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 ; X64-NEXT: andq %rax, %rcx ; X64-NEXT: subq %rcx, %rdi -; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: andq %rax, %rcx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333 +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: shrq $2, %rdi -; X64-NEXT: andq %rax, %rdi -; X64-NEXT: addq %rcx, %rdi +; X64-NEXT: andq %rcx, %rdi +; X64-NEXT: addq %rax, %rdi ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $4, %rax ; X64-NEXT: leaq (%rax,%rdi), %rax Index: llvm/test/CodeGen/X86/pr29170.ll =================================================================== --- llvm/test/CodeGen/X86/pr29170.ll +++ llvm/test/CodeGen/X86/pr29170.ll @@ -13,12 +13,12 @@ ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_3 ; CHECK-NEXT: # %bb.1: # %go -; CHECK-NEXT: movl $-1, %ecx -; CHECK-NEXT: movsbl b, %edx -; CHECK-NEXT: notl %ecx -; CHECK-NEXT: movzwl %dx, %edx -; CHECK-NEXT: cmpl $-1, %edx -; CHECK-NEXT: sbbl %ecx, %eax +; CHECK-NEXT: movsbl b, %ecx +; CHECK-NEXT: movl $-1, %edx +; CHECK-NEXT: notl %edx +; CHECK-NEXT: movzwl %cx, %ecx +; CHECK-NEXT: cmpl $-1, %ecx +; CHECK-NEXT: sbbl %edx, %eax ; CHECK-NEXT: jge .LBB0_3 ; CHECK-NEXT: # %bb.2: # %if.then ; CHECK-NEXT: movl $42, %eax Index: llvm/test/CodeGen/X86/pr36865.ll =================================================================== --- llvm/test/CodeGen/X86/pr36865.ll +++ llvm/test/CodeGen/X86/pr36865.ll @@ -7,8 +7,8 @@ ; CHECK-NEXT: subq $424, %rsp # imm = 0x1A8 ; CHECK-NEXT: .cfi_def_cfa_offset 432 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi -; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: movl $400, %edx # imm = 0x190 +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq memset ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movl (%rax), %ecx Index: llvm/test/CodeGen/X86/pr38795.ll =================================================================== --- llvm/test/CodeGen/X86/pr38795.ll +++ llvm/test/CodeGen/X86/pr38795.ll @@ -22,12 +22,12 @@ ; CHECK-NEXT: .cfi_offset %edi, -16 ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 -; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: # implicit-def: $esi ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $ah ; CHECK-NEXT: # implicit-def: $al ; CHECK-NEXT: # implicit-def: $edx +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_16: # %for.inc @@ -127,8 +127,8 @@ ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: jne .LBB0_11 ; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_20: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 Index: llvm/test/CodeGen/X86/pr38865.ll =================================================================== --- llvm/test/CodeGen/X86/pr38865.ll +++ llvm/test/CodeGen/X86/pr38865.ll @@ -15,11 +15,11 @@ ; CHECK-NEXT: subl $528, %esp # encoding: [0x81,0xec,0x10,0x02,0x00,0x00] ; CHECK-NEXT: # imm = 0x210 ; CHECK-NEXT: leal {{[0-9]+}}(%rsp), %ebx # encoding: [0x8d,0x9c,0x24,0x08,0x01,0x00,0x00] +; CHECK-NEXT: movl %ebx, %edi # encoding: [0x89,0xdf] ; CHECK-NEXT: movl $c, %esi # encoding: [0xbe,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 1, value: c, kind: FK_Data_4 ; CHECK-NEXT: movl $260, %edx # encoding: [0xba,0x04,0x01,0x00,0x00] ; CHECK-NEXT: # imm = 0x104 -; CHECK-NEXT: movl %ebx, %edi # encoding: [0x89,0xdf] ; CHECK-NEXT: callq memcpy # encoding: [0xe8,A,A,A,A] ; CHECK-NEXT: # fixup A - offset: 1, value: memcpy-4, kind: FK_PCRel_4 ; CHECK-NEXT: movl $32, %ecx # encoding: [0xb9,0x20,0x00,0x00,0x00] Index: llvm/test/CodeGen/X86/pr39391.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/pr39391.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -O2 -mtriple=i686-unknown-linux-gnu -o - | FileCheck %s + +@f = global i8* zeroinitializer + +define void @g() #0 { +; CHECK-LABEL: g: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %ebx, -12 +; CHECK-NEXT: movl f, %esi +; CHECK-NEXT: movb (%esi), %al +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: lock cmpxchg8b (%esi) +; CHECK-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %k.end +; CHECK-NEXT: .LBB0_1: # %. +; CHECK-NEXT: calll m +entry: + %0 = load i8*, i8** @f + %1 = load atomic i8, i8* %0 monotonic, align 1 + %d.h.h.h.h.h = bitcast i8* %0 to i64* + %2 = load atomic i64, i64* %d.h.h.h.h.h monotonic, align 8 + %j.h = icmp eq i8 %1, 0 + br i1 %j.h, label %k.end, label %. + +.: ; preds = %., %entry + %3 = call i32 @m() + unreachable + +k.end: ; preds = %entry + unreachable +} + +declare i32 @m() + +attributes #0 = { "no-frame-pointer-elim-non-leaf" } Index: llvm/test/CodeGen/X86/pr9517.ll =================================================================== --- llvm/test/CodeGen/X86/pr9517.ll +++ llvm/test/CodeGen/X86/pr9517.ll @@ -22,7 +22,7 @@ ret i16 %v } -; The asm call prevents the merging the loads here. +; The asm call prevents the merging the loads here. define i16 @unify_through_trival_asm_w_memory_clobber() { ; CHECK-LABEL: unify_through_trival_asm_w_memory_clobber: ; CHECK: # %bb.0: Index: llvm/test/CodeGen/X86/required-vector-width.ll =================================================================== --- llvm/test/CodeGen/X86/required-vector-width.ll +++ llvm/test/CodeGen/X86/required-vector-width.ll @@ -165,9 +165,9 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB8_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -236,8 +236,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB9_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -304,8 +304,8 @@ ; CHECK-LABEL: sad_16i8_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: movq $-1024, %rax # imm = 0xFC00 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: movq $-1024, %rax # imm = 0xFC00 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB10_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/X86/sad.ll =================================================================== --- llvm/test/CodeGen/X86/sad.ll +++ llvm/test/CodeGen/X86/sad.ll @@ -12,8 +12,8 @@ ; SSE2-LABEL: sad_16i8: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB0_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -37,8 +37,8 @@ ; AVX1-LABEL: sad_16i8: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB0_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -65,8 +65,8 @@ ; AVX2-LABEL: sad_16i8: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB0_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -152,7 +152,6 @@ ; SSE2-LABEL: sad_32i8: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: pxor %xmm12, %xmm12 -; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: pxor %xmm0, %xmm0 @@ -165,6 +164,7 @@ ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: pxor %xmm14, %xmm14 +; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB1_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -288,8 +288,8 @@ ; AVX1-LABEL: sad_32i8: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB1_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -328,8 +328,8 @@ ; AVX2-LABEL: sad_32i8: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB1_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -355,8 +355,8 @@ ; AVX512-LABEL: sad_32i8: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB1_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 @@ -422,7 +422,6 @@ ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: subq $200, %rsp ; SSE2-NEXT: pxor %xmm14, %xmm14 -; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: pxor %xmm0, %xmm0 @@ -455,6 +454,7 @@ ; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB2_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -717,7 +717,6 @@ ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: subq $24, %rsp ; AVX1-NEXT: vpxor %xmm14, %xmm14, %xmm14 -; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX1-NEXT: vpxor %xmm15, %xmm15, %xmm15 ; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7 ; AVX1-NEXT: vpxor %xmm13, %xmm13, %xmm13 @@ -725,6 +724,7 @@ ; AVX1-NEXT: vpxor %xmm9, %xmm9, %xmm9 ; AVX1-NEXT: vpxor %xmm10, %xmm10, %xmm10 ; AVX1-NEXT: vpxor %xmm12, %xmm12, %xmm12 +; AVX1-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX1-NEXT: .p2align 4, 0x90 ; AVX1-NEXT: .LBB2_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 @@ -872,7 +872,6 @@ ; AVX2-LABEL: sad_avx64i8: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -880,6 +879,7 @@ ; AVX2-NEXT: vpxor %xmm6, %xmm6, %xmm6 ; AVX2-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; AVX2-NEXT: vpxor %xmm7, %xmm7, %xmm7 +; AVX2-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB2_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -949,10 +949,10 @@ ; AVX512F-LABEL: sad_avx64i8: ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX512F-NEXT: .p2align 4, 0x90 ; AVX512F-NEXT: .LBB2_1: # %vector.body ; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1 @@ -997,8 +997,8 @@ ; AVX512BW-LABEL: sad_avx64i8: ; AVX512BW: # %bb.0: # %entry ; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX512BW-NEXT: .p2align 4, 0x90 ; AVX512BW-NEXT: .LBB2_1: # %vector.body ; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1067,9 +1067,9 @@ ; SSE2-LABEL: sad_2i8: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: movq $-1024, %rax # imm = 0xFC00 -; SSE2-NEXT: movl $65535, %ecx # imm = 0xFFFF -; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB3_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1090,8 +1090,8 @@ ; AVX-LABEL: sad_2i8: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: movq $-1024, %rax # imm = 0xFC00 ; AVX-NEXT: .p2align 4, 0x90 ; AVX-NEXT: .LBB3_1: # %vector.body ; AVX-NEXT: # =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/X86/sadd_sat.ll =================================================================== --- llvm/test/CodeGen/X86/sadd_sat.ll +++ llvm/test/CodeGen/X86/sadd_sat.ll @@ -10,8 +10,8 @@ define i32 @func(i32 %x, i32 %y) { ; CHECK-LABEL: func: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: addl %esi, %ecx ; CHECK-NEXT: setns %al ; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF @@ -26,8 +26,8 @@ ; CHECK32-NEXT: .cfi_offset %esi, -8 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK32-NEXT: xorl %ecx, %ecx ; CHECK32-NEXT: movl %eax, %esi +; CHECK32-NEXT: xorl %ecx, %ecx ; CHECK32-NEXT: addl %edx, %esi ; CHECK32-NEXT: setns %cl ; CHECK32-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF @@ -43,8 +43,8 @@ define i64 @func2(i64 %x, i64 %y) { ; CHECK-LABEL: func2: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: setns %cl ; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF @@ -151,46 +151,46 @@ ; CHECK-NEXT: movd %xmm2, %ecx ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] ; CHECK-NEXT: movd %xmm2, %r8d -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %r8d, %esi -; CHECK-NEXT: addl %ecx, %esi -; CHECK-NEXT: setns %dl -; CHECK-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF -; CHECK-NEXT: addl %ecx, %r8d -; CHECK-NEXT: cmovol %edx, %r8d -; CHECK-NEXT: movd %xmm1, %edx -; CHECK-NEXT: movd %xmm0, %ecx +; CHECK-NEXT: movl %r8d, %edx ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: movl %ecx, %edi -; CHECK-NEXT: addl %edx, %edi +; CHECK-NEXT: addl %ecx, %edx ; CHECK-NEXT: setns %sil ; CHECK-NEXT: addl $2147483647, %esi # imm = 0x7FFFFFFF +; CHECK-NEXT: addl %ecx, %r8d +; CHECK-NEXT: cmovol %esi, %r8d +; CHECK-NEXT: movd %xmm1, %edx +; CHECK-NEXT: movd %xmm0, %ecx +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: addl %edx, %esi +; CHECK-NEXT: setns %dil +; CHECK-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF ; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: cmovol %esi, %ecx +; CHECK-NEXT: cmovol %edi, %ecx ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; CHECK-NEXT: movd %xmm2, %edx ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; CHECK-NEXT: movd %xmm2, %eax -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movd %xmm2, %esi +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl %edx, %edi +; CHECK-NEXT: setns %al +; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: addl %edx, %esi -; CHECK-NEXT: setns %dil -; CHECK-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF -; CHECK-NEXT: addl %edx, %eax -; CHECK-NEXT: cmovol %edi, %eax +; CHECK-NEXT: cmovol %eax, %esi ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] ; CHECK-NEXT: movd %xmm1, %r9d ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; CHECK-NEXT: movd %xmm0, %edx -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: addl %r9d, %esi -; CHECK-NEXT: setns %dil -; CHECK-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl %r9d, %edi +; CHECK-NEXT: setns %al +; CHECK-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: addl %r9d, %edx -; CHECK-NEXT: cmovol %edi, %edx +; CHECK-NEXT: cmovol %eax, %edx ; CHECK-NEXT: movd %edx, %xmm0 -; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movd %esi, %xmm1 ; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-NEXT: movd %ecx, %xmm0 ; CHECK-NEXT: movd %r8d, %xmm2 @@ -214,8 +214,8 @@ ; CHECK32-NEXT: .cfi_offset %ebp, -8 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: movl %ecx, %esi +; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: addl %edx, %esi ; CHECK32-NEXT: setns %al ; CHECK32-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF @@ -223,8 +223,8 @@ ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK32-NEXT: cmovol %eax, %ecx ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: movl %edx, %edi +; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: addl %esi, %edi ; CHECK32-NEXT: setns %al ; CHECK32-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF @@ -232,8 +232,8 @@ ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK32-NEXT: cmovol %eax, %edx ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: movl %esi, %ebx +; CHECK32-NEXT: xorl %eax, %eax ; CHECK32-NEXT: addl %edi, %ebx ; CHECK32-NEXT: setns %al ; CHECK32-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF @@ -241,8 +241,8 @@ ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK32-NEXT: cmovol %eax, %esi ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: xorl %ebx, %ebx ; CHECK32-NEXT: movl %edi, %ebp +; CHECK32-NEXT: xorl %ebx, %ebx ; CHECK32-NEXT: addl %eax, %ebp ; CHECK32-NEXT: setns %bl ; CHECK32-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF Index: llvm/test/CodeGen/X86/scalar_widen_div.ll =================================================================== --- llvm/test/CodeGen/X86/scalar_widen_div.ll +++ llvm/test/CodeGen/X86/scalar_widen_div.ll @@ -237,12 +237,12 @@ ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq %rcx ; CHECK-NEXT: movq %rax, %rcx -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq %r8 ; CHECK-NEXT: movq %rax, %rsi -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %r10, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq %r9 ; CHECK-NEXT: movq %rax, %rdi ; CHECK-NEXT: movq %rcx, %rax @@ -372,22 +372,22 @@ ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, %xmm0 -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, %xmm1 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %r8, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, %xmm0 -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, %xmm2 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: movq %r9, %rax +; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divq {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, 32(%rdi) ; CHECK-NEXT: movdqa %xmm2, 16(%rdi) Index: llvm/test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -26,8 +26,8 @@ ; ILP-NEXT: adcq $0, %r8 ; ILP-NEXT: leal 1(%rsi,%rsi), %edi ; ILP-NEXT: movl $1, %ebp -; ILP-NEXT: xorl %r14d, %r14d ; ILP-NEXT: movl %edi, %ecx +; ILP-NEXT: xorl %r14d, %r14d ; ILP-NEXT: shldq %cl, %rbp, %r14 ; ILP-NEXT: movl $1, %r11d ; ILP-NEXT: shlq %cl, %r11 @@ -38,8 +38,8 @@ ; ILP-NEXT: shlq %cl, %r13 ; ILP-NEXT: movl $1, %r12d ; ILP-NEXT: shrdq %cl, %rax, %r12 -; ILP-NEXT: xorl %r15d, %r15d ; ILP-NEXT: movl %edi, %ecx +; ILP-NEXT: xorl %r15d, %r15d ; ILP-NEXT: shldq %cl, %r15, %r15 ; ILP-NEXT: movq %rsi, %rbx ; ILP-NEXT: shrdq %cl, %rdx, %rbx @@ -100,7 +100,6 @@ ; ; HYBRID-LABEL: test1: ; HYBRID: # %bb.0: -; HYBRID-NEXT: pushq %rbp ; HYBRID-NEXT: pushq %r15 ; HYBRID-NEXT: pushq %r14 ; HYBRID-NEXT: pushq %r13 @@ -112,84 +111,82 @@ ; HYBRID-NEXT: adcq $0, %rdx ; HYBRID-NEXT: adcq $0, %r9 ; HYBRID-NEXT: adcq $0, %r8 -; HYBRID-NEXT: xorl %r10d, %r10d ; HYBRID-NEXT: leal 1(%rsi,%rsi), %edi -; HYBRID-NEXT: xorl %r14d, %r14d ; HYBRID-NEXT: movl %edi, %ecx -; HYBRID-NEXT: shldq %cl, %r14, %r14 +; HYBRID-NEXT: xorl %r15d, %r15d +; HYBRID-NEXT: shldq %cl, %r15, %r15 +; HYBRID-NEXT: xorl %r14d, %r14d ; HYBRID-NEXT: testb $64, %dil -; HYBRID-NEXT: cmovneq %r10, %r14 -; HYBRID-NEXT: movl $1, %ebp +; HYBRID-NEXT: cmovneq %r14, %r15 +; HYBRID-NEXT: movl $1, %r11d ; HYBRID-NEXT: movl $1, %r12d ; HYBRID-NEXT: shlq %cl, %r12 ; HYBRID-NEXT: testb $64, %dil -; HYBRID-NEXT: movq %r12, %r11 -; HYBRID-NEXT: cmovneq %r10, %r11 +; HYBRID-NEXT: movq %r12, %r10 +; HYBRID-NEXT: cmovneq %r14, %r10 ; HYBRID-NEXT: movq %rsi, %rbx ; HYBRID-NEXT: shrdq %cl, %rdx, %rbx ; HYBRID-NEXT: shrq %cl, %rdx ; HYBRID-NEXT: testb $64, %dil ; HYBRID-NEXT: cmoveq %rbx, %rdx -; HYBRID-NEXT: xorl %r15d, %r15d -; HYBRID-NEXT: shldq %cl, %rbp, %r15 +; HYBRID-NEXT: xorl %r13d, %r13d +; HYBRID-NEXT: shldq %cl, %r11, %r13 ; HYBRID-NEXT: testb $64, %dil -; HYBRID-NEXT: cmovneq %r12, %r15 +; HYBRID-NEXT: cmovneq %r12, %r13 ; HYBRID-NEXT: movb $-128, %cl ; HYBRID-NEXT: subb %dil, %cl -; HYBRID-NEXT: movq %r9, %r13 -; HYBRID-NEXT: shlq %cl, %r13 +; HYBRID-NEXT: movq %r9, %rbx +; HYBRID-NEXT: shlq %cl, %rbx ; HYBRID-NEXT: movl $1, %r12d -; HYBRID-NEXT: shrdq %cl, %r10, %r12 +; HYBRID-NEXT: shrdq %cl, %r14, %r12 ; HYBRID-NEXT: testb $64, %cl -; HYBRID-NEXT: cmovneq %r10, %r12 -; HYBRID-NEXT: cmovneq %r10, %r13 -; HYBRID-NEXT: orl %edx, %r13d +; HYBRID-NEXT: cmovneq %r14, %r12 +; HYBRID-NEXT: cmovneq %r14, %rbx +; HYBRID-NEXT: orl %edx, %ebx ; HYBRID-NEXT: movl %edi, %ecx ; HYBRID-NEXT: addb $-128, %cl ; HYBRID-NEXT: shrdq %cl, %r8, %r9 ; HYBRID-NEXT: shrq %cl, %r8 ; HYBRID-NEXT: xorl %edx, %edx -; HYBRID-NEXT: shldq %cl, %rbp, %rdx -; HYBRID-NEXT: shlq %cl, %rbp +; HYBRID-NEXT: shldq %cl, %r11, %rdx +; HYBRID-NEXT: shlq %cl, %r11 ; HYBRID-NEXT: testb $64, %cl -; HYBRID-NEXT: cmovneq %rbp, %rdx +; HYBRID-NEXT: cmovneq %r11, %rdx ; HYBRID-NEXT: cmoveq %r9, %r8 -; HYBRID-NEXT: cmovneq %r10, %rbp +; HYBRID-NEXT: cmovneq %r14, %r11 ; HYBRID-NEXT: testb %dil, %dil ; HYBRID-NEXT: jns .LBB0_2 ; HYBRID-NEXT: # %bb.1: -; HYBRID-NEXT: movl %r8d, %r13d +; HYBRID-NEXT: movl %r8d, %ebx ; HYBRID-NEXT: .LBB0_2: ; HYBRID-NEXT: je .LBB0_4 ; HYBRID-NEXT: # %bb.3: -; HYBRID-NEXT: movl %r13d, %esi +; HYBRID-NEXT: movl %ebx, %esi ; HYBRID-NEXT: .LBB0_4: -; HYBRID-NEXT: cmovsq %r10, %r15 -; HYBRID-NEXT: cmovnsq %r12, %rbp -; HYBRID-NEXT: cmoveq %r10, %rbp -; HYBRID-NEXT: cmovnsq %r14, %rdx -; HYBRID-NEXT: cmoveq %r10, %rdx -; HYBRID-NEXT: cmovsq %r10, %r11 +; HYBRID-NEXT: cmovsq %r14, %r13 +; HYBRID-NEXT: cmovnsq %r12, %r11 +; HYBRID-NEXT: cmoveq %r14, %r11 +; HYBRID-NEXT: cmovnsq %r15, %rdx +; HYBRID-NEXT: cmoveq %r14, %rdx +; HYBRID-NEXT: cmovsq %r14, %r10 ; HYBRID-NEXT: testb $1, %sil ; HYBRID-NEXT: cmovneq %rax, %rdx ; HYBRID-NEXT: movq %rdx, 24(%rax) -; HYBRID-NEXT: cmovneq %rax, %rbp -; HYBRID-NEXT: movq %rbp, 16(%rax) -; HYBRID-NEXT: cmovneq %rax, %r15 -; HYBRID-NEXT: movq %r15, 8(%rax) ; HYBRID-NEXT: cmovneq %rax, %r11 -; HYBRID-NEXT: movq %r11, (%rax) +; HYBRID-NEXT: movq %r11, 16(%rax) +; HYBRID-NEXT: cmovneq %rax, %r13 +; HYBRID-NEXT: movq %r13, 8(%rax) +; HYBRID-NEXT: cmovneq %rax, %r10 +; HYBRID-NEXT: movq %r10, (%rax) ; HYBRID-NEXT: popq %rbx ; HYBRID-NEXT: popq %r12 ; HYBRID-NEXT: popq %r13 ; HYBRID-NEXT: popq %r14 ; HYBRID-NEXT: popq %r15 -; HYBRID-NEXT: popq %rbp ; HYBRID-NEXT: retq ; ; BURR-LABEL: test1: ; BURR: # %bb.0: -; BURR-NEXT: pushq %rbp ; BURR-NEXT: pushq %r15 ; BURR-NEXT: pushq %r14 ; BURR-NEXT: pushq %r13 @@ -201,79 +198,78 @@ ; BURR-NEXT: adcq $0, %rdx ; BURR-NEXT: adcq $0, %r9 ; BURR-NEXT: adcq $0, %r8 -; BURR-NEXT: xorl %r10d, %r10d ; BURR-NEXT: leal 1(%rsi,%rsi), %edi -; BURR-NEXT: xorl %r14d, %r14d ; BURR-NEXT: movl %edi, %ecx -; BURR-NEXT: shldq %cl, %r14, %r14 +; BURR-NEXT: xorl %r15d, %r15d +; BURR-NEXT: shldq %cl, %r15, %r15 +; BURR-NEXT: xorl %r14d, %r14d ; BURR-NEXT: testb $64, %dil -; BURR-NEXT: cmovneq %r10, %r14 -; BURR-NEXT: movl $1, %ebp +; BURR-NEXT: cmovneq %r14, %r15 +; BURR-NEXT: movl $1, %r11d ; BURR-NEXT: movl $1, %r12d ; BURR-NEXT: shlq %cl, %r12 ; BURR-NEXT: testb $64, %dil -; BURR-NEXT: movq %r12, %r11 -; BURR-NEXT: cmovneq %r10, %r11 +; BURR-NEXT: movq %r12, %r10 +; BURR-NEXT: cmovneq %r14, %r10 ; BURR-NEXT: movq %rsi, %rbx ; BURR-NEXT: shrdq %cl, %rdx, %rbx ; BURR-NEXT: shrq %cl, %rdx ; BURR-NEXT: testb $64, %dil ; BURR-NEXT: cmoveq %rbx, %rdx -; BURR-NEXT: xorl %r15d, %r15d -; BURR-NEXT: shldq %cl, %rbp, %r15 +; BURR-NEXT: xorl %r13d, %r13d +; BURR-NEXT: shldq %cl, %r11, %r13 ; BURR-NEXT: testb $64, %dil -; BURR-NEXT: cmovneq %r12, %r15 +; BURR-NEXT: cmovneq %r12, %r13 ; BURR-NEXT: movb $-128, %cl ; BURR-NEXT: subb %dil, %cl -; BURR-NEXT: movq %r9, %r13 -; BURR-NEXT: shlq %cl, %r13 +; BURR-NEXT: movq %r9, %rbx +; BURR-NEXT: shlq %cl, %rbx ; BURR-NEXT: movl $1, %r12d -; BURR-NEXT: shrdq %cl, %r10, %r12 +; BURR-NEXT: shrdq %cl, %r14, %r12 ; BURR-NEXT: testb $64, %cl -; BURR-NEXT: cmovneq %r10, %r12 -; BURR-NEXT: cmovneq %r10, %r13 -; BURR-NEXT: orl %edx, %r13d +; BURR-NEXT: cmovneq %r14, %r12 +; BURR-NEXT: cmovneq %r14, %rbx +; BURR-NEXT: orl %edx, %ebx ; BURR-NEXT: movl %edi, %ecx ; BURR-NEXT: addb $-128, %cl ; BURR-NEXT: shrdq %cl, %r8, %r9 ; BURR-NEXT: xorl %edx, %edx -; BURR-NEXT: shldq %cl, %rbp, %rdx +; BURR-NEXT: shldq %cl, %r11, %rdx ; BURR-NEXT: shrq %cl, %r8 -; BURR-NEXT: shlq %cl, %rbp +; BURR-NEXT: shlq %cl, %r11 ; BURR-NEXT: testb $64, %cl -; BURR-NEXT: cmovneq %rbp, %rdx +; BURR-NEXT: cmovneq %r11, %rdx ; BURR-NEXT: cmoveq %r9, %r8 -; BURR-NEXT: cmovneq %r10, %rbp +; BURR-NEXT: cmovneq %r14, %r11 ; BURR-NEXT: testb %dil, %dil ; BURR-NEXT: jns .LBB0_2 ; BURR-NEXT: # %bb.1: -; BURR-NEXT: movl %r8d, %r13d +; BURR-NEXT: movl %r8d, %ebx ; BURR-NEXT: .LBB0_2: ; BURR-NEXT: je .LBB0_4 ; BURR-NEXT: # %bb.3: -; BURR-NEXT: movl %r13d, %esi +; BURR-NEXT: movl %ebx, %esi ; BURR-NEXT: .LBB0_4: -; BURR-NEXT: cmovsq %r10, %r15 -; BURR-NEXT: cmovnsq %r12, %rbp -; BURR-NEXT: cmoveq %r10, %rbp -; BURR-NEXT: cmovnsq %r14, %rdx -; BURR-NEXT: cmoveq %r10, %rdx -; BURR-NEXT: cmovsq %r10, %r11 +; BURR-NEXT: cmovsq %r14, %r13 +; BURR-NEXT: cmovnsq %r12, %r11 +; BURR-NEXT: cmoveq %r14, %r11 +; BURR-NEXT: cmovnsq %r15, %rdx +; BURR-NEXT: cmoveq %r14, %rdx +; BURR-NEXT: cmovsq %r14, %r10 ; BURR-NEXT: testb $1, %sil ; BURR-NEXT: cmovneq %rax, %rdx ; BURR-NEXT: movq %rdx, 24(%rax) -; BURR-NEXT: cmovneq %rax, %rbp -; BURR-NEXT: movq %rbp, 16(%rax) -; BURR-NEXT: cmovneq %rax, %r15 -; BURR-NEXT: movq %r15, 8(%rax) ; BURR-NEXT: cmovneq %rax, %r11 -; BURR-NEXT: movq %r11, (%rax) +; BURR-NEXT: movq %r11, 16(%rax) +; BURR-NEXT: cmovneq %rax, %r13 +; BURR-NEXT: movq %r13, 8(%rax) +; BURR-NEXT: cmovneq %rax, %r10 +; BURR-NEXT: movq %r10, (%rax) ; BURR-NEXT: popq %rbx ; BURR-NEXT: popq %r12 ; BURR-NEXT: popq %r13 ; BURR-NEXT: popq %r14 ; BURR-NEXT: popq %r15 -; BURR-NEXT: popq %rbp ; BURR-NEXT: retq ; ; SRC-LABEL: test1: @@ -293,14 +289,13 @@ ; SRC-NEXT: leal 1(%rsi,%rsi), %r11d ; SRC-NEXT: movb $-128, %r10b ; SRC-NEXT: subb %r11b, %r10b -; SRC-NEXT: movq %r9, %r12 +; SRC-NEXT: movq %r9, %r15 ; SRC-NEXT: movl %r10d, %ecx -; SRC-NEXT: shlq %cl, %r12 +; SRC-NEXT: shlq %cl, %r15 ; SRC-NEXT: movq %rsi, %rbp ; SRC-NEXT: movl %r11d, %ecx ; SRC-NEXT: shrdq %cl, %rdx, %rbp ; SRC-NEXT: shrq %cl, %rdx -; SRC-NEXT: xorl %r15d, %r15d ; SRC-NEXT: movl $1, %edi ; SRC-NEXT: xorl %r14d, %r14d ; SRC-NEXT: shldq %cl, %rdi, %r14 @@ -308,18 +303,19 @@ ; SRC-NEXT: shldq %cl, %r13, %r13 ; SRC-NEXT: movl $1, %ebx ; SRC-NEXT: shlq %cl, %rbx +; SRC-NEXT: xorl %r12d, %r12d ; SRC-NEXT: testb $64, %r11b ; SRC-NEXT: cmoveq %rbp, %rdx ; SRC-NEXT: cmovneq %rbx, %r14 -; SRC-NEXT: cmovneq %r15, %rbx -; SRC-NEXT: cmovneq %r15, %r13 +; SRC-NEXT: cmovneq %r12, %rbx +; SRC-NEXT: cmovneq %r12, %r13 ; SRC-NEXT: movl $1, %ebp ; SRC-NEXT: movl %r10d, %ecx -; SRC-NEXT: shrdq %cl, %r15, %rbp +; SRC-NEXT: shrdq %cl, %r12, %rbp ; SRC-NEXT: testb $64, %r10b -; SRC-NEXT: cmovneq %r15, %r12 -; SRC-NEXT: cmovneq %r15, %rbp -; SRC-NEXT: orl %edx, %r12d +; SRC-NEXT: cmovneq %r12, %r15 +; SRC-NEXT: cmovneq %r12, %rbp +; SRC-NEXT: orl %edx, %r15d ; SRC-NEXT: movl %r11d, %ecx ; SRC-NEXT: addb $-128, %cl ; SRC-NEXT: shrdq %cl, %r8, %r9 @@ -330,22 +326,22 @@ ; SRC-NEXT: testb $64, %cl ; SRC-NEXT: cmoveq %r9, %r8 ; SRC-NEXT: cmovneq %rdi, %rdx -; SRC-NEXT: cmovneq %r15, %rdi +; SRC-NEXT: cmovneq %r12, %rdi ; SRC-NEXT: testb %r11b, %r11b ; SRC-NEXT: jns .LBB0_2 ; SRC-NEXT: # %bb.1: -; SRC-NEXT: movl %r8d, %r12d +; SRC-NEXT: movl %r8d, %r15d ; SRC-NEXT: .LBB0_2: ; SRC-NEXT: je .LBB0_4 ; SRC-NEXT: # %bb.3: -; SRC-NEXT: movl %r12d, %esi +; SRC-NEXT: movl %r15d, %esi ; SRC-NEXT: .LBB0_4: ; SRC-NEXT: cmovnsq %r13, %rdx -; SRC-NEXT: cmoveq %r15, %rdx +; SRC-NEXT: cmoveq %r12, %rdx ; SRC-NEXT: cmovnsq %rbp, %rdi -; SRC-NEXT: cmoveq %r15, %rdi -; SRC-NEXT: cmovsq %r15, %r14 -; SRC-NEXT: cmovsq %r15, %rbx +; SRC-NEXT: cmoveq %r12, %rdi +; SRC-NEXT: cmovsq %r12, %r14 +; SRC-NEXT: cmovsq %r12, %rbx ; SRC-NEXT: testb $1, %sil ; SRC-NEXT: cmovneq %rax, %rbx ; SRC-NEXT: cmovneq %rax, %r14 @@ -372,13 +368,13 @@ ; LIN-NEXT: pushq %rbx ; LIN-NEXT: movq %rcx, %r9 ; LIN-NEXT: movq %rdi, %rax -; LIN-NEXT: xorl %r15d, %r15d ; LIN-NEXT: movl $1, %r14d ; LIN-NEXT: addq $1, %rsi ; LIN-NEXT: leal 1(%rsi,%rsi), %ebp ; LIN-NEXT: movl $1, %r12d ; LIN-NEXT: movl %ebp, %ecx ; LIN-NEXT: shlq %cl, %r12 +; LIN-NEXT: xorl %r15d, %r15d ; LIN-NEXT: testb $64, %bpl ; LIN-NEXT: movq %r12, %rbx ; LIN-NEXT: cmovneq %r15, %rbx @@ -418,8 +414,8 @@ ; LIN-NEXT: testb $1, %sil ; LIN-NEXT: cmovneq %rax, %rbx ; LIN-NEXT: movq %rbx, (%rax) -; LIN-NEXT: xorl %edx, %edx ; LIN-NEXT: movl %ebp, %ecx +; LIN-NEXT: xorl %edx, %edx ; LIN-NEXT: shldq %cl, %r14, %rdx ; LIN-NEXT: cmovneq %r12, %rdx ; LIN-NEXT: cmovsq %r15, %rdx @@ -438,12 +434,12 @@ ; LIN-NEXT: cmoveq %r15, %rdi ; LIN-NEXT: cmovneq %rax, %rdi ; LIN-NEXT: movq %rdi, 16(%rax) -; LIN-NEXT: xorl %esi, %esi ; LIN-NEXT: movl %r10d, %ecx +; LIN-NEXT: xorl %esi, %esi ; LIN-NEXT: shldq %cl, %r14, %rsi ; LIN-NEXT: cmovneq %rdx, %rsi -; LIN-NEXT: xorl %edx, %edx ; LIN-NEXT: movl %ebp, %ecx +; LIN-NEXT: xorl %edx, %edx ; LIN-NEXT: shldq %cl, %rdx, %rdx ; LIN-NEXT: cmovneq %r15, %rdx ; LIN-NEXT: cmovsq %rsi, %rdx @@ -470,8 +466,8 @@ ; ILP-LABEL: test2: ; ILP: # %bb.0: ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %edi, %edi ; ILP-NEXT: movq %rsi, %r11 +; ILP-NEXT: xorl %edi, %edi ; ILP-NEXT: negq %r11 ; ILP-NEXT: movl $0, %r10d ; ILP-NEXT: sbbq %rdx, %r10 @@ -510,8 +506,8 @@ ; HYBRID-LABEL: test2: ; HYBRID: # %bb.0: ; HYBRID-NEXT: movq %rdi, %rax -; HYBRID-NEXT: xorl %r9d, %r9d ; HYBRID-NEXT: movq %rsi, %r11 +; HYBRID-NEXT: xorl %r9d, %r9d ; HYBRID-NEXT: negq %r11 ; HYBRID-NEXT: movl $0, %r10d ; HYBRID-NEXT: sbbq %rdx, %r10 @@ -550,8 +546,8 @@ ; BURR-LABEL: test2: ; BURR: # %bb.0: ; BURR-NEXT: movq %rdi, %rax -; BURR-NEXT: xorl %r9d, %r9d ; BURR-NEXT: movq %rsi, %r11 +; BURR-NEXT: xorl %r9d, %r9d ; BURR-NEXT: negq %r11 ; BURR-NEXT: movl $0, %r10d ; BURR-NEXT: sbbq %rdx, %r10 @@ -590,8 +586,8 @@ ; SRC-LABEL: test2: ; SRC: # %bb.0: ; SRC-NEXT: movq %rdi, %rax -; SRC-NEXT: xorl %edi, %edi ; SRC-NEXT: movq %rsi, %r11 +; SRC-NEXT: xorl %edi, %edi ; SRC-NEXT: negq %r11 ; SRC-NEXT: movl $0, %r10d ; SRC-NEXT: sbbq %rdx, %r10 @@ -676,8 +672,8 @@ ; ILP-LABEL: test3: ; ILP: # %bb.0: ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %r10d, %r10d ; ILP-NEXT: movq %rsi, %r9 +; ILP-NEXT: xorl %r10d, %r10d ; ILP-NEXT: negq %r9 ; ILP-NEXT: movl $0, %r11d ; ILP-NEXT: sbbq %rdx, %r11 @@ -721,8 +717,8 @@ ; HYBRID: # %bb.0: ; HYBRID-NEXT: pushq %rbx ; HYBRID-NEXT: movq %rdi, %rax -; HYBRID-NEXT: xorl %edi, %edi ; HYBRID-NEXT: movq %rsi, %r9 +; HYBRID-NEXT: xorl %edi, %edi ; HYBRID-NEXT: negq %r9 ; HYBRID-NEXT: movl $0, %r10d ; HYBRID-NEXT: sbbq %rdx, %r10 @@ -767,8 +763,8 @@ ; BURR: # %bb.0: ; BURR-NEXT: pushq %rbx ; BURR-NEXT: movq %rdi, %rax -; BURR-NEXT: xorl %edi, %edi ; BURR-NEXT: movq %rsi, %r9 +; BURR-NEXT: xorl %edi, %edi ; BURR-NEXT: negq %r9 ; BURR-NEXT: movl $0, %r10d ; BURR-NEXT: sbbq %rdx, %r10 @@ -1005,8 +1001,8 @@ ; ILP: # %bb.0: ; ILP-NEXT: pushq %rbx ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %r9d, %r9d ; ILP-NEXT: movq %rsi, %rbx +; ILP-NEXT: xorl %r9d, %r9d ; ILP-NEXT: negq %rbx ; ILP-NEXT: movl $0, %r11d ; ILP-NEXT: sbbq %rdx, %r11 @@ -1052,8 +1048,8 @@ ; HYBRID: # %bb.0: ; HYBRID-NEXT: pushq %rbx ; HYBRID-NEXT: movq %rdi, %rax -; HYBRID-NEXT: xorl %r9d, %r9d ; HYBRID-NEXT: movq %rsi, %rbx +; HYBRID-NEXT: xorl %r9d, %r9d ; HYBRID-NEXT: negq %rbx ; HYBRID-NEXT: movl $0, %r11d ; HYBRID-NEXT: sbbq %rdx, %r11 @@ -1099,8 +1095,8 @@ ; BURR: # %bb.0: ; BURR-NEXT: pushq %rbx ; BURR-NEXT: movq %rdi, %rax -; BURR-NEXT: xorl %r9d, %r9d ; BURR-NEXT: movq %rsi, %rbx +; BURR-NEXT: xorl %r9d, %r9d ; BURR-NEXT: negq %rbx ; BURR-NEXT: movl $0, %r11d ; BURR-NEXT: sbbq %rdx, %r11 @@ -1146,8 +1142,8 @@ ; SRC: # %bb.0: ; SRC-NEXT: pushq %rbx ; SRC-NEXT: movq %rdi, %rax -; SRC-NEXT: xorl %r9d, %r9d ; SRC-NEXT: movq %rsi, %rbx +; SRC-NEXT: xorl %r9d, %r9d ; SRC-NEXT: negq %rbx ; SRC-NEXT: movl $0, %r11d ; SRC-NEXT: sbbq %rdx, %r11 Index: llvm/test/CodeGen/X86/select.ll =================================================================== --- llvm/test/CodeGen/X86/select.ll +++ llvm/test/CodeGen/X86/select.ll @@ -204,8 +204,8 @@ ; MCU-NEXT: flds {{\.LCPI.*}} ; MCU-NEXT: fucompp ; MCU-NEXT: fnstsw %ax -; MCU-NEXT: xorl %edx, %edx ; MCU-NEXT: # kill: def $ah killed $ah killed $ax +; MCU-NEXT: xorl %edx, %edx ; MCU-NEXT: sahf ; MCU-NEXT: seta %dl ; MCU-NEXT: movb (%ecx,%edx,4), %al Index: llvm/test/CodeGen/X86/select_const.ll =================================================================== --- llvm/test/CodeGen/X86/select_const.ll +++ llvm/test/CodeGen/X86/select_const.ll @@ -485,8 +485,8 @@ ; CHECK-LABEL: opaque_constant: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: movq $-4, %rcx ; CHECK-NEXT: movl $23, %eax +; CHECK-NEXT: movq $-4, %rcx ; CHECK-NEXT: cmovneq %rcx, %rax ; CHECK-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001 ; CHECK-NEXT: andq %rcx, %rax Index: llvm/test/CodeGen/X86/shrink-compare.ll =================================================================== --- llvm/test/CodeGen/X86/shrink-compare.ll +++ llvm/test/CodeGen/X86/shrink-compare.ll @@ -68,8 +68,8 @@ define i1 @test4(i64 %a, i32 %b) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: je .LBB3_1 ; CHECK-NEXT: # %bb.2: # %lor.end ; CHECK-NEXT: # kill: def $al killed $al killed $eax Index: llvm/test/CodeGen/X86/shrink_vmul.ll =================================================================== --- llvm/test/CodeGen/X86/shrink_vmul.ll +++ llvm/test/CodeGen/X86/shrink_vmul.ll @@ -2357,23 +2357,23 @@ ; X86-AVX1-NEXT: movl %edx, %ebp ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; X86-AVX1-NEXT: vpextrd $3, %xmm1, %ecx -; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: vpextrd $3, %xmm0, %eax +; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: divl %ecx ; X86-AVX1-NEXT: movl %edx, %ebx -; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: vpextrd $2, %xmm1, %esi ; X86-AVX1-NEXT: vpextrd $2, %xmm0, %eax +; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: divl %esi ; X86-AVX1-NEXT: movl %edx, %esi -; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: vpextrd $1, %xmm1, %edi ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %eax +; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: divl %edi ; X86-AVX1-NEXT: movl %edx, %edi -; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: vmovd %xmm1, %ecx ; X86-AVX1-NEXT: vmovd %xmm0, %eax +; X86-AVX1-NEXT: xorl %edx, %edx ; X86-AVX1-NEXT: divl %ecx ; X86-AVX1-NEXT: vmovd %edx, %xmm0 ; X86-AVX1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 Index: llvm/test/CodeGen/X86/sink-hoist.ll =================================================================== --- llvm/test/CodeGen/X86/sink-hoist.ll +++ llvm/test/CodeGen/X86/sink-hoist.ll @@ -151,7 +151,7 @@ ; a load from a zero-extending load for hoisting. ; CHECK-LABEL: default_get_pch_validity: -; CHECK: movl cl_options_count(%rip), %ecx +; CHECK: movl cl_options_count(%rip), %eax @cl_options_count = external constant i32 ; [#uses=2] Index: llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening-call-and-ret.ll @@ -15,8 +15,8 @@ ; X64-NOPIC-NEXT: pushq %rbx ; X64-NOPIC-NEXT: movq %rsp, %rax ; X64-NOPIC-NEXT: movq %rdi, %rbx -; X64-NOPIC-NEXT: movq $-1, %r14 ; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: movq $-1, %r14 ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: orq %rax, %rsp ; X64-NOPIC-NEXT: callq f @@ -53,8 +53,8 @@ ; X64-NOPIC-MCM-NEXT: pushq %rbx ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx -; X64-NOPIC-MCM-NEXT: movq $-1, %r14 ; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: movq $-1, %r14 ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp ; X64-NOPIC-MCM-NEXT: callq f @@ -62,7 +62,7 @@ ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NOPIC-MCM-NEXT: sarq $63, %rax -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr0(%rip), %rdx +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rdx ; X64-NOPIC-MCM-NEXT: cmpq %rdx, %rcx ; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rax ; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp @@ -73,7 +73,7 @@ ; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx ; X64-NOPIC-MCM-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; X64-NOPIC-MCM-NEXT: sarq $63, %rcx -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr1(%rip), %rdx +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rdx ; X64-NOPIC-MCM-NEXT: cmpq %rdx, %rax ; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rcx ; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp @@ -93,8 +93,8 @@ ; X64-PIC-NEXT: pushq %rbx ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: movq %rdi, %rbx -; X64-PIC-NEXT: movq $-1, %r14 ; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: movq $-1, %r14 ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: orq %rax, %rsp ; X64-PIC-NEXT: callq f@PLT @@ -102,7 +102,7 @@ ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-PIC-NEXT: sarq $63, %rax -; X64-PIC-NEXT: leaq .Lslh_ret_addr0(%rip), %rdx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rdx ; X64-PIC-NEXT: cmpq %rdx, %rcx ; X64-PIC-NEXT: cmovneq %r14, %rax ; X64-PIC-NEXT: movl (%rbx), %ebp @@ -113,7 +113,7 @@ ; X64-PIC-NEXT: movq %rsp, %rcx ; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; X64-PIC-NEXT: sarq $63, %rcx -; X64-PIC-NEXT: leaq .Lslh_ret_addr1(%rip), %rdx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rdx ; X64-PIC-NEXT: cmpq %rdx, %rax ; X64-PIC-NEXT: cmovneq %r14, %rcx ; X64-PIC-NEXT: addl (%rbx), %ebp @@ -144,8 +144,8 @@ ; X64-NOPIC-NEXT: pushq %rax ; X64-NOPIC-NEXT: movq %rsp, %rax ; X64-NOPIC-NEXT: movq %rdi, %rbx -; X64-NOPIC-NEXT: movq $-1, %r14 ; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: movq $-1, %r14 ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: orq %rax, %rsp ; X64-NOPIC-NEXT: movq $.Lslh_ret_addr2, %rbp @@ -186,27 +186,27 @@ ; X64-NOPIC-MCM-NEXT: pushq %rax ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx -; X64-NOPIC-MCM-NEXT: movq $-1, %r14 ; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: movq $-1, %r14 ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr2(%rip), %rbp +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rbp ; X64-NOPIC-MCM-NEXT: callq f ; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr2: ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: sarq $63, %rax -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr2(%rip), %rcx +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rcx ; X64-NOPIC-MCM-NEXT: cmpq %rcx, %rbp ; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rax ; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr3(%rip), %r15 +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %r15 ; X64-NOPIC-MCM-NEXT: callq f ; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr3: ; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx ; X64-NOPIC-MCM-NEXT: sarq $63, %rcx -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr3(%rip), %rax +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rax ; X64-NOPIC-MCM-NEXT: cmpq %rax, %r15 ; X64-NOPIC-MCM-NEXT: cmovneq %r14, %rcx ; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp @@ -230,27 +230,27 @@ ; X64-PIC-NEXT: pushq %rax ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: movq %rdi, %rbx -; X64-PIC-NEXT: movq $-1, %r14 ; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: movq $-1, %r14 ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: orq %rax, %rsp -; X64-PIC-NEXT: leaq .Lslh_ret_addr2(%rip), %rbp +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rbp ; X64-PIC-NEXT: callq f@PLT ; X64-PIC-NEXT: .Lslh_ret_addr2: ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: sarq $63, %rax -; X64-PIC-NEXT: leaq .Lslh_ret_addr2(%rip), %rcx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; X64-PIC-NEXT: cmpq %rcx, %rbp ; X64-PIC-NEXT: cmovneq %r14, %rax ; X64-PIC-NEXT: movl (%rbx), %ebp ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: orq %rax, %rsp -; X64-PIC-NEXT: leaq .Lslh_ret_addr3(%rip), %r15 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %r15 ; X64-PIC-NEXT: callq f@PLT ; X64-PIC-NEXT: .Lslh_ret_addr3: ; X64-PIC-NEXT: movq %rsp, %rcx ; X64-PIC-NEXT: sarq $63, %rcx -; X64-PIC-NEXT: leaq .Lslh_ret_addr3(%rip), %rax +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; X64-PIC-NEXT: cmpq %rax, %r15 ; X64-PIC-NEXT: cmovneq %r14, %rcx ; X64-PIC-NEXT: addl (%rbx), %ebp @@ -288,8 +288,8 @@ ; X64-NOPIC-NEXT: subq $16, %rsp ; X64-NOPIC-NEXT: movq %rsp, %rax ; X64-NOPIC-NEXT: movq %rdi, %rbx -; X64-NOPIC-NEXT: movq $-1, %r15 ; X64-NOPIC-NEXT: sarq $63, %rax +; X64-NOPIC-NEXT: movq $-1, %r15 ; X64-NOPIC-NEXT: movq %rsp, %r14 ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: movq %r14, %rdi @@ -302,9 +302,9 @@ ; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr4, %rbp ; X64-NOPIC-NEXT: cmovneq %r15, %rax ; X64-NOPIC-NEXT: movl (%rbx), %ebp -; X64-NOPIC-NEXT: movl $42, %esi ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: movq %r14, %rdi +; X64-NOPIC-NEXT: movl $42, %esi ; X64-NOPIC-NEXT: orq %rax, %rsp ; X64-NOPIC-NEXT: movq $.Lslh_ret_addr5, %r12 ; X64-NOPIC-NEXT: callq sigsetjmp @@ -314,10 +314,10 @@ ; X64-NOPIC-NEXT: cmpq $.Lslh_ret_addr5, %r12 ; X64-NOPIC-NEXT: cmovneq %r15, %rax ; X64-NOPIC-NEXT: addl (%rbx), %ebp -; X64-NOPIC-NEXT: movl $42, %edx ; X64-NOPIC-NEXT: shlq $47, %rax ; X64-NOPIC-NEXT: movq %r14, %rdi ; X64-NOPIC-NEXT: movq %r14, %rsi +; X64-NOPIC-NEXT: movl $42, %edx ; X64-NOPIC-NEXT: orq %rax, %rsp ; X64-NOPIC-NEXT: movq $.Lslh_ret_addr6, %r14 ; X64-NOPIC-NEXT: callq __sigsetjmp @@ -349,45 +349,45 @@ ; X64-NOPIC-MCM-NEXT: subq $16, %rsp ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: movq %rdi, %rbx -; X64-NOPIC-MCM-NEXT: movq $-1, %r15 ; X64-NOPIC-MCM-NEXT: sarq $63, %rax +; X64-NOPIC-MCM-NEXT: movq $-1, %r15 ; X64-NOPIC-MCM-NEXT: movq %rsp, %r14 ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: movq %r14, %rdi ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr4(%rip), %rbp +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rbp ; X64-NOPIC-MCM-NEXT: callq setjmp ; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr4: ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: sarq $63, %rax -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr4(%rip), %rcx +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rcx ; X64-NOPIC-MCM-NEXT: cmpq %rcx, %rbp ; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rax ; X64-NOPIC-MCM-NEXT: movl (%rbx), %ebp -; X64-NOPIC-MCM-NEXT: movl $42, %esi ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: movq %r14, %rdi +; X64-NOPIC-MCM-NEXT: movl $42, %esi ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr5(%rip), %r12 +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %r12 ; X64-NOPIC-MCM-NEXT: callq sigsetjmp ; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr5: ; X64-NOPIC-MCM-NEXT: movq %rsp, %rax ; X64-NOPIC-MCM-NEXT: sarq $63, %rax -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr5(%rip), %rcx +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rcx ; X64-NOPIC-MCM-NEXT: cmpq %rcx, %r12 ; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rax ; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp -; X64-NOPIC-MCM-NEXT: movl $42, %edx ; X64-NOPIC-MCM-NEXT: shlq $47, %rax ; X64-NOPIC-MCM-NEXT: movq %r14, %rdi ; X64-NOPIC-MCM-NEXT: movq %r14, %rsi +; X64-NOPIC-MCM-NEXT: movl $42, %edx ; X64-NOPIC-MCM-NEXT: orq %rax, %rsp -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr6(%rip), %r14 +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %r14 ; X64-NOPIC-MCM-NEXT: callq __sigsetjmp ; X64-NOPIC-MCM-NEXT: .Lslh_ret_addr6: ; X64-NOPIC-MCM-NEXT: movq %rsp, %rcx ; X64-NOPIC-MCM-NEXT: sarq $63, %rcx -; X64-NOPIC-MCM-NEXT: leaq .Lslh_ret_addr6(%rip), %rax +; X64-NOPIC-MCM-NEXT: leaq {{.*}}(%rip), %rax ; X64-NOPIC-MCM-NEXT: cmpq %rax, %r14 ; X64-NOPIC-MCM-NEXT: cmovneq %r15, %rcx ; X64-NOPIC-MCM-NEXT: addl (%rbx), %ebp @@ -413,45 +413,45 @@ ; X64-PIC-NEXT: subq $16, %rsp ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: movq %rdi, %rbx -; X64-PIC-NEXT: movq $-1, %r15 ; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: movq $-1, %r15 ; X64-PIC-NEXT: movq %rsp, %r14 ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: movq %r14, %rdi ; X64-PIC-NEXT: orq %rax, %rsp -; X64-PIC-NEXT: leaq .Lslh_ret_addr4(%rip), %rbp +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rbp ; X64-PIC-NEXT: callq setjmp@PLT ; X64-PIC-NEXT: .Lslh_ret_addr4: ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: sarq $63, %rax -; X64-PIC-NEXT: leaq .Lslh_ret_addr4(%rip), %rcx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; X64-PIC-NEXT: cmpq %rcx, %rbp ; X64-PIC-NEXT: cmovneq %r15, %rax ; X64-PIC-NEXT: movl (%rbx), %ebp -; X64-PIC-NEXT: movl $42, %esi ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: movq %r14, %rdi +; X64-PIC-NEXT: movl $42, %esi ; X64-PIC-NEXT: orq %rax, %rsp -; X64-PIC-NEXT: leaq .Lslh_ret_addr5(%rip), %r12 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %r12 ; X64-PIC-NEXT: callq sigsetjmp@PLT ; X64-PIC-NEXT: .Lslh_ret_addr5: ; X64-PIC-NEXT: movq %rsp, %rax ; X64-PIC-NEXT: sarq $63, %rax -; X64-PIC-NEXT: leaq .Lslh_ret_addr5(%rip), %rcx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; X64-PIC-NEXT: cmpq %rcx, %r12 ; X64-PIC-NEXT: cmovneq %r15, %rax ; X64-PIC-NEXT: addl (%rbx), %ebp -; X64-PIC-NEXT: movl $42, %edx ; X64-PIC-NEXT: shlq $47, %rax ; X64-PIC-NEXT: movq %r14, %rdi ; X64-PIC-NEXT: movq %r14, %rsi +; X64-PIC-NEXT: movl $42, %edx ; X64-PIC-NEXT: orq %rax, %rsp -; X64-PIC-NEXT: leaq .Lslh_ret_addr6(%rip), %r14 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %r14 ; X64-PIC-NEXT: callq __sigsetjmp@PLT ; X64-PIC-NEXT: .Lslh_ret_addr6: ; X64-PIC-NEXT: movq %rsp, %rcx ; X64-PIC-NEXT: sarq $63, %rcx -; X64-PIC-NEXT: leaq .Lslh_ret_addr6(%rip), %rax +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rax ; X64-PIC-NEXT: cmpq %rax, %r14 ; X64-PIC-NEXT: cmovneq %r15, %rcx ; X64-PIC-NEXT: addl (%rbx), %ebp Index: llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll @@ -7,7 +7,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -16,6 +15,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vgatherdps %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -30,7 +30,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -39,6 +38,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -53,7 +53,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -62,6 +61,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -76,7 +76,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -85,6 +84,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -99,7 +99,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -108,6 +107,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 ; CHECK-NEXT: vgatherdps %ymm1, (%rdi,%ymm0), %ymm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -122,7 +122,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -131,6 +130,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 ; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%ymm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: vzeroupper @@ -146,7 +146,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -155,6 +154,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -169,7 +169,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -178,6 +177,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 ; CHECK-NEXT: vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -192,7 +192,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -201,6 +200,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -215,7 +215,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -224,6 +223,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -238,7 +238,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -247,6 +246,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -261,7 +261,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -270,6 +269,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -284,7 +284,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -293,6 +292,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 ; CHECK-NEXT: vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -307,7 +307,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -316,6 +315,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 ; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm2, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: vzeroupper @@ -331,7 +331,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -340,6 +339,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -354,7 +354,6 @@ ; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-NEXT: orq %rax, %rdi @@ -363,6 +362,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 ; CHECK-NEXT: vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2 ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %ymm2, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -377,7 +377,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -386,6 +385,7 @@ ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -400,7 +400,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -410,6 +409,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vgatherdpd (%rdi,%ymm0), %zmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -424,7 +424,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -433,6 +432,7 @@ ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -447,7 +447,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -456,6 +455,7 @@ ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vgatherqpd (%rdi,%zmm0), %zmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -470,7 +470,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -479,6 +478,7 @@ ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -493,7 +493,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -503,6 +502,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vpgatherdq (%rdi,%ymm0), %zmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -518,7 +518,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -527,6 +526,7 @@ ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpgatherqd (%rdi,%zmm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -541,7 +541,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -550,6 +549,7 @@ ; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpgatherqq (%rdi,%zmm0), %zmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -564,7 +564,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: orq %rax, %rdi @@ -572,6 +571,7 @@ ; CHECK-NEXT: vporq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -586,7 +586,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -595,6 +594,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vgatherdps (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -609,7 +609,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -618,6 +617,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vgatherqps (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -632,7 +632,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -641,6 +640,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -655,7 +655,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -664,6 +663,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -678,7 +678,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -687,6 +686,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vgatherdps (%rdi,%ymm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -701,7 +701,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -710,6 +709,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vgatherqps (%rdi,%ymm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: vzeroupper @@ -725,7 +725,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -734,6 +733,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -748,7 +748,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -757,6 +756,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -771,7 +771,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -780,6 +779,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -794,7 +794,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -803,6 +802,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -817,7 +817,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -826,6 +825,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -840,7 +840,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -849,6 +848,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -863,7 +863,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -872,6 +871,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -886,7 +886,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -895,6 +894,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm0), %xmm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: vzeroupper @@ -910,7 +910,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -919,6 +918,7 @@ ; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq @@ -933,7 +933,6 @@ ; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: sarq $63, %rax ; CHECK-NEXT: kxnorw %k0, %k0, %k1 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -942,6 +941,7 @@ ; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm0), %ymm1 {%k1} ; CHECK-NEXT: shlq $47, %rax +; CHECK-NEXT: movq $-1, %rcx ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: orq %rax, %rsp ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll @@ -19,8 +19,8 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: movq (%rdi), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax @@ -41,8 +41,8 @@ ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: pushq %rbx ; X64-PIC-NEXT: movq %rsp, %rax -; X64-PIC-NEXT: movq $-1, %rbx ; X64-PIC-NEXT: sarq $63, %rax +; X64-PIC-NEXT: movq $-1, %rbx ; X64-PIC-NEXT: movq (%rdi), %rcx ; X64-PIC-NEXT: orq %rax, %rcx ; X64-PIC-NEXT: shlq $47, %rax @@ -52,7 +52,7 @@ ; X64-PIC-NEXT: movq %rsp, %rcx ; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-PIC-NEXT: sarq $63, %rcx -; X64-PIC-NEXT: leaq .Lslh_ret_addr0(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rbx, %rcx ; X64-PIC-NEXT: shlq $47, %rcx @@ -64,10 +64,10 @@ ; X64-RETPOLINE: # %bb.0: # %entry ; X64-RETPOLINE-NEXT: pushq %rbx ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: sarq $63, %rax ; X64-RETPOLINE-NEXT: movq (%rdi), %r11 ; X64-RETPOLINE-NEXT: orq %rax, %r11 +; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: shlq $47, %rax ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11 @@ -91,33 +91,33 @@ ; X64-LABEL: test_indirect_tail_call: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movq (%rdi), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax +; X64-NEXT: movq $-1, %rdx ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: jmpq *%rcx # TAILCALL ; ; X64-PIC-LABEL: test_indirect_tail_call: ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: movq %rsp, %rax -; X64-PIC-NEXT: movq $-1, %rcx ; X64-PIC-NEXT: sarq $63, %rax ; X64-PIC-NEXT: movq (%rdi), %rcx ; X64-PIC-NEXT: orq %rax, %rcx ; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: movq $-1, %rdx ; X64-PIC-NEXT: orq %rax, %rsp ; X64-PIC-NEXT: jmpq *%rcx # TAILCALL ; ; X64-RETPOLINE-LABEL: test_indirect_tail_call: ; X64-RETPOLINE: # %bb.0: # %entry ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rcx ; X64-RETPOLINE-NEXT: sarq $63, %rax ; X64-RETPOLINE-NEXT: movq (%rdi), %r11 ; X64-RETPOLINE-NEXT: orq %rax, %r11 ; X64-RETPOLINE-NEXT: shlq $47, %rax +; X64-RETPOLINE-NEXT: movq $-1, %rcx ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: jmp __llvm_retpoline_r11 # TAILCALL entry: @@ -131,9 +131,9 @@ ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax -; X64-NEXT: movq global_fnptr(%rip), %rcx +; X64-NEXT: movq $-1, %rbx +; X64-NEXT: movq {{.*}}(%rip), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp @@ -153,9 +153,9 @@ ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: pushq %rbx ; X64-PIC-NEXT: movq %rsp, %rax -; X64-PIC-NEXT: movq $-1, %rbx ; X64-PIC-NEXT: sarq $63, %rax -; X64-PIC-NEXT: movq global_fnptr@GOTPCREL(%rip), %rcx +; X64-PIC-NEXT: movq global_fnptr@{{.*}}(%rip), %rcx +; X64-PIC-NEXT: movq $-1, %rbx ; X64-PIC-NEXT: movq (%rcx), %rcx ; X64-PIC-NEXT: orq %rax, %rcx ; X64-PIC-NEXT: shlq $47, %rax @@ -165,7 +165,7 @@ ; X64-PIC-NEXT: movq %rsp, %rcx ; X64-PIC-NEXT: movq -{{[0-9]+}}(%rsp), %rdx ; X64-PIC-NEXT: sarq $63, %rcx -; X64-PIC-NEXT: leaq .Lslh_ret_addr1(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rbx, %rcx ; X64-PIC-NEXT: shlq $47, %rcx @@ -177,9 +177,9 @@ ; X64-RETPOLINE: # %bb.0: # %entry ; X64-RETPOLINE-NEXT: pushq %rbx ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: sarq $63, %rax -; X64-RETPOLINE-NEXT: movq global_fnptr(%rip), %r11 +; X64-RETPOLINE-NEXT: movq {{.*}}(%rip), %r11 +; X64-RETPOLINE-NEXT: movq $-1, %rbx ; X64-RETPOLINE-NEXT: shlq $47, %rax ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: callq __llvm_retpoline_r11 @@ -203,33 +203,33 @@ ; X64-LABEL: test_indirect_tail_call_global: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rcx ; X64-NEXT: sarq $63, %rax -; X64-NEXT: movq global_fnptr(%rip), %rcx +; X64-NEXT: movq {{.*}}(%rip), %rcx ; X64-NEXT: orq %rax, %rcx ; X64-NEXT: shlq $47, %rax +; X64-NEXT: movq $-1, %rdx ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: jmpq *%rcx # TAILCALL ; ; X64-PIC-LABEL: test_indirect_tail_call_global: ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: movq %rsp, %rax -; X64-PIC-NEXT: movq $-1, %rcx ; X64-PIC-NEXT: sarq $63, %rax -; X64-PIC-NEXT: movq global_fnptr@GOTPCREL(%rip), %rcx +; X64-PIC-NEXT: movq global_fnptr@{{.*}}(%rip), %rcx ; X64-PIC-NEXT: movq (%rcx), %rcx ; X64-PIC-NEXT: orq %rax, %rcx ; X64-PIC-NEXT: shlq $47, %rax +; X64-PIC-NEXT: movq $-1, %rdx ; X64-PIC-NEXT: orq %rax, %rsp ; X64-PIC-NEXT: jmpq *%rcx # TAILCALL ; ; X64-RETPOLINE-LABEL: test_indirect_tail_call_global: ; X64-RETPOLINE: # %bb.0: # %entry ; X64-RETPOLINE-NEXT: movq %rsp, %rax -; X64-RETPOLINE-NEXT: movq $-1, %rcx ; X64-RETPOLINE-NEXT: sarq $63, %rax -; X64-RETPOLINE-NEXT: movq global_fnptr(%rip), %r11 +; X64-RETPOLINE-NEXT: movq {{.*}}(%rip), %r11 ; X64-RETPOLINE-NEXT: shlq $47, %rax +; X64-RETPOLINE-NEXT: movq $-1, %rcx ; X64-RETPOLINE-NEXT: orq %rax, %rsp ; X64-RETPOLINE-NEXT: jmp __llvm_retpoline_r11 # TAILCALL entry: @@ -242,68 +242,78 @@ ; X64-LABEL: test_indirectbr: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsp, %rcx -; X64-NEXT: movq $-1, %rax ; X64-NEXT: sarq $63, %rcx ; X64-NEXT: movq (%rdi), %rdx ; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: movq $-1, %rax ; X64-NEXT: jmpq *%rdx ; X64-NEXT: .LBB4_1: # %bb0 ; X64-NEXT: cmpq $.LBB4_1, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $2, %eax -; X64-NEXT: jmp .LBB4_2 -; X64-NEXT: .LBB4_4: # %bb2 -; X64-NEXT: cmpq $.LBB4_4, %rdx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq +; X64-NEXT: .LBB4_3: # %bb2 +; X64-NEXT: cmpq $.LBB4_3, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $13, %eax -; X64-NEXT: jmp .LBB4_2 -; X64-NEXT: .LBB4_5: # %bb3 -; X64-NEXT: cmpq $.LBB4_5, %rdx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq +; X64-NEXT: .LBB4_4: # %bb3 +; X64-NEXT: cmpq $.LBB4_4, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $42, %eax -; X64-NEXT: jmp .LBB4_2 -; X64-NEXT: .LBB4_3: # %bb1 -; X64-NEXT: cmpq $.LBB4_3, %rdx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq +; X64-NEXT: .LBB4_2: # %bb1 +; X64-NEXT: cmpq $.LBB4_2, %rdx ; X64-NEXT: cmovneq %rax, %rcx -; X64-NEXT: movl $7, %eax -; X64-NEXT: .LBB4_2: # %bb0 ; X64-NEXT: shlq $47, %rcx +; X64-NEXT: movl $7, %eax ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: retq ; ; X64-PIC-LABEL: test_indirectbr: ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: movq %rsp, %rcx -; X64-PIC-NEXT: movq $-1, %rax ; X64-PIC-NEXT: sarq $63, %rcx ; X64-PIC-NEXT: movq (%rdi), %rdx ; X64-PIC-NEXT: orq %rcx, %rdx +; X64-PIC-NEXT: movq $-1, %rax ; X64-PIC-NEXT: jmpq *%rdx ; X64-PIC-NEXT: .LBB4_1: # %bb0 -; X64-PIC-NEXT: leaq .LBB4_1(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $2, %eax -; X64-PIC-NEXT: jmp .LBB4_2 -; X64-PIC-NEXT: .LBB4_4: # %bb2 -; X64-PIC-NEXT: leaq .LBB4_4(%rip), %rsi +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq +; X64-PIC-NEXT: .LBB4_3: # %bb2 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $13, %eax -; X64-PIC-NEXT: jmp .LBB4_2 -; X64-PIC-NEXT: .LBB4_5: # %bb3 -; X64-PIC-NEXT: leaq .LBB4_5(%rip), %rsi +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq +; X64-PIC-NEXT: .LBB4_4: # %bb3 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $42, %eax -; X64-PIC-NEXT: jmp .LBB4_2 -; X64-PIC-NEXT: .LBB4_3: # %bb1 -; X64-PIC-NEXT: leaq .LBB4_3(%rip), %rsi +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq +; X64-PIC-NEXT: .LBB4_2: # %bb1 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx -; X64-PIC-NEXT: movl $7, %eax -; X64-PIC-NEXT: .LBB4_2: # %bb0 ; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl $7, %eax ; X64-PIC-NEXT: orq %rcx, %rsp ; X64-PIC-NEXT: retq ; @@ -330,121 +340,136 @@ ; X64-LABEL: test_indirectbr_global: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsp, %rcx -; X64-NEXT: movq $-1, %rax ; X64-NEXT: sarq $63, %rcx -; X64-NEXT: movslq %edi, %rdx -; X64-NEXT: movq global_blockaddrs(,%rdx,8), %rdx +; X64-NEXT: movslq %edi, %rax +; X64-NEXT: movq global_blockaddrs(,%rax,8), %rdx ; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: movq $-1, %rax ; X64-NEXT: jmpq *%rdx ; X64-NEXT: .Ltmp0: # Block address taken ; X64-NEXT: .LBB5_1: # %bb0 ; X64-NEXT: cmpq $.LBB5_1, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $2, %eax -; X64-NEXT: jmp .LBB5_2 +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq ; X64-NEXT: .Ltmp1: # Block address taken -; X64-NEXT: .LBB5_4: # %bb2 -; X64-NEXT: cmpq $.LBB5_4, %rdx +; X64-NEXT: .LBB5_3: # %bb2 +; X64-NEXT: cmpq $.LBB5_3, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $13, %eax -; X64-NEXT: jmp .LBB5_2 +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq ; X64-NEXT: .Ltmp2: # Block address taken -; X64-NEXT: .LBB5_5: # %bb3 -; X64-NEXT: cmpq $.LBB5_5, %rdx +; X64-NEXT: .LBB5_4: # %bb3 +; X64-NEXT: cmpq $.LBB5_4, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $42, %eax -; X64-NEXT: jmp .LBB5_2 +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq ; X64-NEXT: .Ltmp3: # Block address taken -; X64-NEXT: .LBB5_3: # %bb1 -; X64-NEXT: cmpq $.LBB5_3, %rdx +; X64-NEXT: .LBB5_2: # %bb1 +; X64-NEXT: cmpq $.LBB5_2, %rdx ; X64-NEXT: cmovneq %rax, %rcx -; X64-NEXT: movl $7, %eax -; X64-NEXT: .LBB5_2: # %bb0 ; X64-NEXT: shlq $47, %rcx +; X64-NEXT: movl $7, %eax ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: retq ; ; X64-PIC-LABEL: test_indirectbr_global: ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: movq %rsp, %rcx -; X64-PIC-NEXT: movq $-1, %rax ; X64-PIC-NEXT: sarq $63, %rcx -; X64-PIC-NEXT: movslq %edi, %rdx -; X64-PIC-NEXT: movq global_blockaddrs@GOTPCREL(%rip), %rsi -; X64-PIC-NEXT: movq (%rsi,%rdx,8), %rdx +; X64-PIC-NEXT: movslq %edi, %rax +; X64-PIC-NEXT: movq global_blockaddrs@{{.*}}(%rip), %rdx +; X64-PIC-NEXT: movq (%rdx,%rax,8), %rdx ; X64-PIC-NEXT: orq %rcx, %rdx +; X64-PIC-NEXT: movq $-1, %rax ; X64-PIC-NEXT: jmpq *%rdx ; X64-PIC-NEXT: .Ltmp0: # Block address taken ; X64-PIC-NEXT: .LBB5_1: # %bb0 -; X64-PIC-NEXT: leaq .LBB5_1(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $2, %eax -; X64-PIC-NEXT: jmp .LBB5_2 +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq ; X64-PIC-NEXT: .Ltmp1: # Block address taken -; X64-PIC-NEXT: .LBB5_4: # %bb2 -; X64-PIC-NEXT: leaq .LBB5_4(%rip), %rsi +; X64-PIC-NEXT: .LBB5_3: # %bb2 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $13, %eax -; X64-PIC-NEXT: jmp .LBB5_2 +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq ; X64-PIC-NEXT: .Ltmp2: # Block address taken -; X64-PIC-NEXT: .LBB5_5: # %bb3 -; X64-PIC-NEXT: leaq .LBB5_5(%rip), %rsi +; X64-PIC-NEXT: .LBB5_4: # %bb3 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $42, %eax -; X64-PIC-NEXT: jmp .LBB5_2 +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq ; X64-PIC-NEXT: .Ltmp3: # Block address taken -; X64-PIC-NEXT: .LBB5_3: # %bb1 -; X64-PIC-NEXT: leaq .LBB5_3(%rip), %rsi +; X64-PIC-NEXT: .LBB5_2: # %bb1 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx -; X64-PIC-NEXT: movl $7, %eax -; X64-PIC-NEXT: .LBB5_2: # %bb0 ; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl $7, %eax ; X64-PIC-NEXT: orq %rcx, %rsp ; X64-PIC-NEXT: retq ; ; X64-RETPOLINE-LABEL: test_indirectbr_global: ; X64-RETPOLINE: # %bb.0: # %entry ; X64-RETPOLINE-NEXT: movq %rsp, %rcx -; X64-RETPOLINE-NEXT: movq $-1, %rax ; X64-RETPOLINE-NEXT: sarq $63, %rcx -; X64-RETPOLINE-NEXT: movslq %edi, %rdx -; X64-RETPOLINE-NEXT: movq global_blockaddrs(,%rdx,8), %rdx +; X64-RETPOLINE-NEXT: movslq %edi, %rax +; X64-RETPOLINE-NEXT: movq global_blockaddrs(,%rax,8), %rdx ; X64-RETPOLINE-NEXT: orq %rcx, %rdx ; X64-RETPOLINE-NEXT: cmpq $2, %rdx -; X64-RETPOLINE-NEXT: je .LBB6_5 +; X64-RETPOLINE-NEXT: movq $-1, %rax +; X64-RETPOLINE-NEXT: je .LBB6_4 ; X64-RETPOLINE-NEXT: # %bb.1: # %entry ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx ; X64-RETPOLINE-NEXT: cmpq $3, %rdx -; X64-RETPOLINE-NEXT: je .LBB6_6 +; X64-RETPOLINE-NEXT: je .LBB6_5 ; X64-RETPOLINE-NEXT: # %bb.2: # %entry ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx ; X64-RETPOLINE-NEXT: cmpq $4, %rdx ; X64-RETPOLINE-NEXT: jne .LBB6_3 ; X64-RETPOLINE-NEXT: .Ltmp0: # Block address taken -; X64-RETPOLINE-NEXT: # %bb.7: # %bb3 +; X64-RETPOLINE-NEXT: # %bb.6: # %bb3 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx +; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: movl $42, %eax -; X64-RETPOLINE-NEXT: jmp .LBB6_4 +; X64-RETPOLINE-NEXT: orq %rcx, %rsp +; X64-RETPOLINE-NEXT: retq ; X64-RETPOLINE-NEXT: .Ltmp1: # Block address taken -; X64-RETPOLINE-NEXT: .LBB6_5: # %bb1 +; X64-RETPOLINE-NEXT: .LBB6_4: # %bb1 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx +; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: movl $7, %eax -; X64-RETPOLINE-NEXT: jmp .LBB6_4 +; X64-RETPOLINE-NEXT: orq %rcx, %rsp +; X64-RETPOLINE-NEXT: retq ; X64-RETPOLINE-NEXT: .Ltmp2: # Block address taken -; X64-RETPOLINE-NEXT: .LBB6_6: # %bb2 +; X64-RETPOLINE-NEXT: .LBB6_5: # %bb2 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx +; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: movl $13, %eax -; X64-RETPOLINE-NEXT: jmp .LBB6_4 +; X64-RETPOLINE-NEXT: orq %rcx, %rsp +; X64-RETPOLINE-NEXT: retq ; X64-RETPOLINE-NEXT: .Ltmp3: # Block address taken ; X64-RETPOLINE-NEXT: .LBB6_3: # %bb0 ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx -; X64-RETPOLINE-NEXT: movl $2, %eax -; X64-RETPOLINE-NEXT: .LBB6_4: # %bb0 ; X64-RETPOLINE-NEXT: shlq $47, %rcx +; X64-RETPOLINE-NEXT: movl $2, %eax ; X64-RETPOLINE-NEXT: orq %rcx, %rsp ; X64-RETPOLINE-NEXT: retq entry: @@ -471,9 +496,9 @@ ; X64-LABEL: test_switch_jumptable: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsp, %rcx -; X64-NEXT: movq $-1, %rax ; X64-NEXT: sarq $63, %rcx ; X64-NEXT: cmpl $3, %edi +; X64-NEXT: movq $-1, %rax ; X64-NEXT: ja .LBB6_2 ; X64-NEXT: # %bb.1: # %entry ; X64-NEXT: cmovaq %rax, %rcx @@ -481,125 +506,146 @@ ; X64-NEXT: movq .LJTI6_0(,%rdx,8), %rdx ; X64-NEXT: orq %rcx, %rdx ; X64-NEXT: jmpq *%rdx -; X64-NEXT: .LBB6_4: # %bb1 -; X64-NEXT: cmpq $.LBB6_4, %rdx +; X64-NEXT: .LBB6_3: # %bb1 +; X64-NEXT: cmpq $.LBB6_3, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $7, %eax -; X64-NEXT: jmp .LBB6_3 +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq ; X64-NEXT: .LBB6_2: # %bb0 ; X64-NEXT: cmovbeq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $2, %eax -; X64-NEXT: jmp .LBB6_3 -; X64-NEXT: .LBB6_5: # %bb2 -; X64-NEXT: cmpq $.LBB6_5, %rdx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq +; X64-NEXT: .LBB6_4: # %bb2 +; X64-NEXT: cmpq $.LBB6_4, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $13, %eax -; X64-NEXT: jmp .LBB6_3 -; X64-NEXT: .LBB6_6: # %bb3 -; X64-NEXT: cmpq $.LBB6_6, %rdx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq +; X64-NEXT: .LBB6_5: # %bb3 +; X64-NEXT: cmpq $.LBB6_5, %rdx ; X64-NEXT: cmovneq %rax, %rcx +; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movl $42, %eax -; X64-NEXT: jmp .LBB6_3 -; X64-NEXT: .LBB6_7: # %bb5 -; X64-NEXT: cmpq $.LBB6_7, %rdx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: retq +; X64-NEXT: .LBB6_6: # %bb5 +; X64-NEXT: cmpq $.LBB6_6, %rdx ; X64-NEXT: cmovneq %rax, %rcx -; X64-NEXT: movl $11, %eax -; X64-NEXT: .LBB6_3: # %bb0 ; X64-NEXT: shlq $47, %rcx +; X64-NEXT: movl $11, %eax ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: retq ; ; X64-PIC-LABEL: test_switch_jumptable: ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: movq %rsp, %rcx -; X64-PIC-NEXT: movq $-1, %rax ; X64-PIC-NEXT: sarq $63, %rcx ; X64-PIC-NEXT: cmpl $3, %edi +; X64-PIC-NEXT: movq $-1, %rax ; X64-PIC-NEXT: ja .LBB6_2 ; X64-PIC-NEXT: # %bb.1: # %entry ; X64-PIC-NEXT: cmovaq %rax, %rcx ; X64-PIC-NEXT: movl %edi, %edx -; X64-PIC-NEXT: leaq .LJTI6_0(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: movslq (%rsi,%rdx,4), %rdx ; X64-PIC-NEXT: addq %rsi, %rdx ; X64-PIC-NEXT: orq %rcx, %rdx ; X64-PIC-NEXT: jmpq *%rdx -; X64-PIC-NEXT: .LBB6_4: # %bb1 -; X64-PIC-NEXT: leaq .LBB6_4(%rip), %rsi +; X64-PIC-NEXT: .LBB6_3: # %bb1 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $7, %eax -; X64-PIC-NEXT: jmp .LBB6_3 +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq ; X64-PIC-NEXT: .LBB6_2: # %bb0 ; X64-PIC-NEXT: cmovbeq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $2, %eax -; X64-PIC-NEXT: jmp .LBB6_3 -; X64-PIC-NEXT: .LBB6_5: # %bb2 -; X64-PIC-NEXT: leaq .LBB6_5(%rip), %rsi +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq +; X64-PIC-NEXT: .LBB6_4: # %bb2 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $13, %eax -; X64-PIC-NEXT: jmp .LBB6_3 -; X64-PIC-NEXT: .LBB6_6: # %bb3 -; X64-PIC-NEXT: leaq .LBB6_6(%rip), %rsi +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq +; X64-PIC-NEXT: .LBB6_5: # %bb3 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx +; X64-PIC-NEXT: shlq $47, %rcx ; X64-PIC-NEXT: movl $42, %eax -; X64-PIC-NEXT: jmp .LBB6_3 -; X64-PIC-NEXT: .LBB6_7: # %bb5 -; X64-PIC-NEXT: leaq .LBB6_7(%rip), %rsi +; X64-PIC-NEXT: orq %rcx, %rsp +; X64-PIC-NEXT: retq +; X64-PIC-NEXT: .LBB6_6: # %bb5 +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: cmpq %rsi, %rdx ; X64-PIC-NEXT: cmovneq %rax, %rcx -; X64-PIC-NEXT: movl $11, %eax -; X64-PIC-NEXT: .LBB6_3: # %bb0 ; X64-PIC-NEXT: shlq $47, %rcx +; X64-PIC-NEXT: movl $11, %eax ; X64-PIC-NEXT: orq %rcx, %rsp ; X64-PIC-NEXT: retq ; ; X64-RETPOLINE-LABEL: test_switch_jumptable: ; X64-RETPOLINE: # %bb.0: # %entry ; X64-RETPOLINE-NEXT: movq %rsp, %rcx -; X64-RETPOLINE-NEXT: movq $-1, %rax ; X64-RETPOLINE-NEXT: sarq $63, %rcx ; X64-RETPOLINE-NEXT: cmpl $1, %edi +; X64-RETPOLINE-NEXT: movq $-1, %rax ; X64-RETPOLINE-NEXT: jg .LBB7_4 ; X64-RETPOLINE-NEXT: # %bb.1: # %entry ; X64-RETPOLINE-NEXT: cmovgq %rax, %rcx ; X64-RETPOLINE-NEXT: testl %edi, %edi -; X64-RETPOLINE-NEXT: je .LBB7_8 +; X64-RETPOLINE-NEXT: je .LBB7_7 ; X64-RETPOLINE-NEXT: # %bb.2: # %entry ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx ; X64-RETPOLINE-NEXT: cmpl $1, %edi ; X64-RETPOLINE-NEXT: jne .LBB7_6 ; X64-RETPOLINE-NEXT: # %bb.3: # %bb2 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx +; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: movl $13, %eax -; X64-RETPOLINE-NEXT: jmp .LBB7_7 +; X64-RETPOLINE-NEXT: orq %rcx, %rsp +; X64-RETPOLINE-NEXT: retq ; X64-RETPOLINE-NEXT: .LBB7_4: # %entry ; X64-RETPOLINE-NEXT: cmovleq %rax, %rcx ; X64-RETPOLINE-NEXT: cmpl $2, %edi -; X64-RETPOLINE-NEXT: je .LBB7_9 +; X64-RETPOLINE-NEXT: je .LBB7_8 ; X64-RETPOLINE-NEXT: # %bb.5: # %entry ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx ; X64-RETPOLINE-NEXT: cmpl $3, %edi ; X64-RETPOLINE-NEXT: jne .LBB7_6 -; X64-RETPOLINE-NEXT: # %bb.10: # %bb5 +; X64-RETPOLINE-NEXT: # %bb.9: # %bb5 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx +; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: movl $11, %eax -; X64-RETPOLINE-NEXT: jmp .LBB7_7 +; X64-RETPOLINE-NEXT: orq %rcx, %rsp +; X64-RETPOLINE-NEXT: retq ; X64-RETPOLINE-NEXT: .LBB7_6: ; X64-RETPOLINE-NEXT: cmoveq %rax, %rcx +; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: movl $2, %eax -; X64-RETPOLINE-NEXT: jmp .LBB7_7 -; X64-RETPOLINE-NEXT: .LBB7_8: # %bb1 +; X64-RETPOLINE-NEXT: orq %rcx, %rsp +; X64-RETPOLINE-NEXT: retq +; X64-RETPOLINE-NEXT: .LBB7_7: # %bb1 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx +; X64-RETPOLINE-NEXT: shlq $47, %rcx ; X64-RETPOLINE-NEXT: movl $7, %eax -; X64-RETPOLINE-NEXT: jmp .LBB7_7 -; X64-RETPOLINE-NEXT: .LBB7_9: # %bb3 +; X64-RETPOLINE-NEXT: orq %rcx, %rsp +; X64-RETPOLINE-NEXT: retq +; X64-RETPOLINE-NEXT: .LBB7_8: # %bb3 ; X64-RETPOLINE-NEXT: cmovneq %rax, %rcx -; X64-RETPOLINE-NEXT: movl $42, %eax -; X64-RETPOLINE-NEXT: .LBB7_7: # %bb0 ; X64-RETPOLINE-NEXT: shlq $47, %rcx +; X64-RETPOLINE-NEXT: movl $42, %eax ; X64-RETPOLINE-NEXT: orq %rcx, %rsp ; X64-RETPOLINE-NEXT: retq entry: @@ -634,16 +680,16 @@ ; X64-LABEL: test_switch_jumptable_fallthrough: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsp, %r9 -; X64-NEXT: movq $-1, %r10 ; X64-NEXT: sarq $63, %r9 ; X64-NEXT: cmpl $3, %edi +; X64-NEXT: movq $-1, %r10 ; X64-NEXT: ja .LBB7_2 ; X64-NEXT: # %bb.1: # %entry ; X64-NEXT: cmovaq %r10, %r9 -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: movl %edi, %esi -; X64-NEXT: movq .LJTI7_0(,%rsi,8), %rsi +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movq .LJTI7_0(,%rax,8), %rsi ; X64-NEXT: orq %r9, %rsi +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: jmpq *%rsi ; X64-NEXT: .LBB7_2: # %bb0 ; X64-NEXT: cmovbeq %r10, %r9 @@ -678,47 +724,47 @@ ; X64-PIC-LABEL: test_switch_jumptable_fallthrough: ; X64-PIC: # %bb.0: # %entry ; X64-PIC-NEXT: movq %rsp, %r9 -; X64-PIC-NEXT: movq $-1, %r10 ; X64-PIC-NEXT: sarq $63, %r9 ; X64-PIC-NEXT: cmpl $3, %edi +; X64-PIC-NEXT: movq $-1, %r10 ; X64-PIC-NEXT: ja .LBB7_2 ; X64-PIC-NEXT: # %bb.1: # %entry ; X64-PIC-NEXT: cmovaq %r10, %r9 -; X64-PIC-NEXT: xorl %eax, %eax -; X64-PIC-NEXT: movl %edi, %esi -; X64-PIC-NEXT: leaq .LJTI7_0(%rip), %rdi -; X64-PIC-NEXT: movslq (%rdi,%rsi,4), %rsi +; X64-PIC-NEXT: movl %edi, %eax +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rdi +; X64-PIC-NEXT: movslq (%rdi,%rax,4), %rsi ; X64-PIC-NEXT: addq %rdi, %rsi ; X64-PIC-NEXT: orq %r9, %rsi +; X64-PIC-NEXT: xorl %eax, %eax ; X64-PIC-NEXT: jmpq *%rsi ; X64-PIC-NEXT: .LBB7_2: # %bb0 ; X64-PIC-NEXT: cmovbeq %r10, %r9 ; X64-PIC-NEXT: movl (%rsi), %eax ; X64-PIC-NEXT: orl %r9d, %eax -; X64-PIC-NEXT: leaq .LBB7_3(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: .LBB7_3: # %bb1 -; X64-PIC-NEXT: leaq .LBB7_3(%rip), %rdi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rdi ; X64-PIC-NEXT: cmpq %rdi, %rsi ; X64-PIC-NEXT: cmovneq %r10, %r9 ; X64-PIC-NEXT: addl (%rdx), %eax ; X64-PIC-NEXT: orl %r9d, %eax -; X64-PIC-NEXT: leaq .LBB7_4(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: .LBB7_4: # %bb2 -; X64-PIC-NEXT: leaq .LBB7_4(%rip), %rdx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rdx ; X64-PIC-NEXT: cmpq %rdx, %rsi ; X64-PIC-NEXT: cmovneq %r10, %r9 ; X64-PIC-NEXT: addl (%rcx), %eax ; X64-PIC-NEXT: orl %r9d, %eax -; X64-PIC-NEXT: leaq .LBB7_5(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: .LBB7_5: # %bb3 -; X64-PIC-NEXT: leaq .LBB7_5(%rip), %rcx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; X64-PIC-NEXT: cmpq %rcx, %rsi ; X64-PIC-NEXT: cmovneq %r10, %r9 ; X64-PIC-NEXT: addl (%r8), %eax ; X64-PIC-NEXT: orl %r9d, %eax -; X64-PIC-NEXT: leaq .LBB7_6(%rip), %rsi +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rsi ; X64-PIC-NEXT: .LBB7_6: # %bb4 -; X64-PIC-NEXT: leaq .LBB7_6(%rip), %rcx +; X64-PIC-NEXT: leaq {{.*}}(%rip), %rcx ; X64-PIC-NEXT: cmpq %rcx, %rsi ; X64-PIC-NEXT: cmovneq %r10, %r9 ; X64-PIC-NEXT: shlq $47, %r9 @@ -728,10 +774,10 @@ ; X64-RETPOLINE-LABEL: test_switch_jumptable_fallthrough: ; X64-RETPOLINE: # %bb.0: # %entry ; X64-RETPOLINE-NEXT: movq %rsp, %r9 -; X64-RETPOLINE-NEXT: movq $-1, %r10 ; X64-RETPOLINE-NEXT: sarq $63, %r9 ; X64-RETPOLINE-NEXT: xorl %eax, %eax ; X64-RETPOLINE-NEXT: cmpl $1, %edi +; X64-RETPOLINE-NEXT: movq $-1, %r10 ; X64-RETPOLINE-NEXT: jg .LBB8_5 ; X64-RETPOLINE-NEXT: # %bb.1: # %entry ; X64-RETPOLINE-NEXT: cmovgq %r10, %r9 Index: llvm/test/CodeGen/X86/speculative-load-hardening.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -12,11 +12,11 @@ ; X64-LABEL: test_trivial_entry_load: ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rsp, %rcx -; X64-NEXT: movq $-1, %rax ; X64-NEXT: sarq $63, %rcx ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: orl %ecx, %eax ; X64-NEXT: shlq $47, %rcx +; X64-NEXT: movq $-1, %rdx ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: retq ; @@ -42,9 +42,9 @@ ; X64-NEXT: .cfi_offset %r14, -24 ; X64-NEXT: .cfi_offset %r15, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: jne .LBB1_1 ; X64-NEXT: # %bb.2: # %then1 ; X64-NEXT: cmovneq %rbx, %rax @@ -202,9 +202,9 @@ ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %r15 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi +; X64-NEXT: movq $-1, %r15 ; X64-NEXT: je .LBB2_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: cmoveq %r15, %rax @@ -312,9 +312,9 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %rbp ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi +; X64-NEXT: movq $-1, %rbp ; X64-NEXT: je .LBB3_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: cmoveq %rbp, %rax @@ -515,9 +515,9 @@ ; X64-NEXT: .cfi_offset %r15, -24 ; X64-NEXT: .cfi_offset %rbp, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %r15 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpl $41, %edi +; X64-NEXT: movq $-1, %r15 ; X64-NEXT: jg .LBB4_1 ; X64-NEXT: # %bb.2: # %thrower ; X64-NEXT: movq %rdx, %r14 @@ -538,10 +538,10 @@ ; X64-NEXT: cmovneq %r15, %rcx ; X64-NEXT: movl %ebp, (%rax) ; X64-NEXT: .Ltmp0: -; X64-NEXT: xorl %esi, %esi -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: movq %rax, %rdi +; X64-NEXT: xorl %esi, %esi +; X64-NEXT: xorl %edx, %edx ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: callq __cxa_throw ; X64-NEXT: .Lslh_ret_addr5: @@ -612,9 +612,9 @@ ; X64-LFENCE-NEXT: callq __cxa_allocate_exception ; X64-LFENCE-NEXT: movl %ebp, (%rax) ; X64-LFENCE-NEXT: .Ltmp0: +; X64-LFENCE-NEXT: movq %rax, %rdi ; X64-LFENCE-NEXT: xorl %esi, %esi ; X64-LFENCE-NEXT: xorl %edx, %edx -; X64-LFENCE-NEXT: movq %rax, %rdi ; X64-LFENCE-NEXT: callq __cxa_throw ; X64-LFENCE-NEXT: .Ltmp1: ; X64-LFENCE-NEXT: .LBB4_2: # %exit @@ -681,10 +681,10 @@ ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rsi, %rbx ; X64-NEXT: movq %rdi, %r12 -; X64-NEXT: movq $-1, %r13 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: orq %rax, %r12 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movq $-1, %r13 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_float @@ -871,10 +871,10 @@ ; X64-NEXT: movq %rcx, %r12 ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rsi, %rbx -; X64-NEXT: movq $-1, %rbp ; X64-NEXT: sarq $63, %rax ; X64-NEXT: orq %rax, %rdi ; X64-NEXT: movaps (%rdi), %xmm0 +; X64-NEXT: movq $-1, %rbp ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink_v4f32 @@ -1005,12 +1005,12 @@ ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq %rsi, %r14 ; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: movq $-1, %r15 ; X64-NEXT: sarq $63, %rax ; X64-NEXT: movl (%rdi), %edi ; X64-NEXT: incl %edi ; X64-NEXT: imull %edx, %edi ; X64-NEXT: orl %eax, %edi +; X64-NEXT: movq $-1, %r15 ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp ; X64-NEXT: callq sink Index: llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll =================================================================== --- llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -75,9 +75,9 @@ ; CHECK: # %shared_preheader ; CHECK: # %shared_loop_header ; CHECK: # %inner_loop_body -; CHECK: # %outer_loop_latch ; CHECK: # %merge_predecessor_split ; CHECK: # %outer_loop_latch +; CHECK: # %outer_loop_latch ; CHECK: # %cleanup define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i32 %wwprva, i32 %e_lfanew, i8* readonly %wwp, i32 %wwpsz, i16 zeroext %sects) local_unnamed_addr #0 { entry: Index: llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -112,8 +112,8 @@ ; X86-NEXT: addl %ebx, %esi ; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload ; X86-NEXT: adcl %eax, %ecx -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: mull %edx ; X86-NEXT: movl %edx, %ebp ; X86-NEXT: movl %eax, %ebx Index: llvm/test/CodeGen/X86/vec_fneg.ll =================================================================== --- llvm/test/CodeGen/X86/vec_fneg.ll +++ llvm/test/CodeGen/X86/vec_fneg.ll @@ -58,12 +58,12 @@ ; X32-SSE1-NEXT: movl %esp, %ebp ; X32-SSE1-NEXT: andl $-16, %esp ; X32-SSE1-NEXT: subl $32, %esp -; X32-SSE1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X32-SSE1-NEXT: movl 12(%ebp), %ecx -; X32-SSE1-NEXT: xorl %eax, %ecx -; X32-SSE1-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X32-SSE1-NEXT: xorl 8(%ebp), %eax -; X32-SSE1-NEXT: movl %eax, (%esp) +; X32-SSE1-NEXT: movl 12(%ebp), %eax +; X32-SSE1-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 +; X32-SSE1-NEXT: xorl %ecx, %eax +; X32-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-SSE1-NEXT: xorl 8(%ebp), %ecx +; X32-SSE1-NEXT: movl %ecx, (%esp) ; X32-SSE1-NEXT: movaps (%esp), %xmm0 ; X32-SSE1-NEXT: movl %ebp, %esp ; X32-SSE1-NEXT: popl %ebp @@ -71,12 +71,12 @@ ; ; X32-SSE2-LABEL: fneg_bitcast: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: xorl %eax, %ecx -; X32-SSE2-NEXT: movd %ecx, %xmm1 -; X32-SSE2-NEXT: xorl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movd %eax, %xmm0 +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 +; X32-SSE2-NEXT: xorl %ecx, %eax +; X32-SSE2-NEXT: movd %eax, %xmm1 +; X32-SSE2-NEXT: xorl {{[0-9]+}}(%esp), %ecx +; X32-SSE2-NEXT: movd %ecx, %xmm0 ; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-SSE2-NEXT: retl ; Index: llvm/test/CodeGen/X86/vec_setcc-2.ll =================================================================== --- llvm/test/CodeGen/X86/vec_setcc-2.ll +++ llvm/test/CodeGen/X86/vec_setcc-2.ll @@ -10,9 +10,9 @@ ; SSE2-NEXT: testl %edx, %edx ; SSE2-NEXT: je LBB0_3 ; SSE2-NEXT: ## %bb.1: ## %for.body.preheader -; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25,25,25,25,25] ; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: LBB0_2: ## %for.body ; SSE2-NEXT: ## =>This Inner Loop Header: Depth=1 @@ -31,9 +31,9 @@ ; SSE41-NEXT: testl %edx, %edx ; SSE41-NEXT: je LBB0_3 ; SSE41-NEXT: ## %bb.1: ## %for.body.preheader -; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [26,26,26,26,26,26,26,26] ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: .p2align 4, 0x90 ; SSE41-NEXT: LBB0_2: ## %for.body ; SSE41-NEXT: ## =>This Inner Loop Header: Depth=1 @@ -79,9 +79,9 @@ ; SSE2-NEXT: testl %edx, %edx ; SSE2-NEXT: je LBB1_3 ; SSE2-NEXT: ## %bb.1: ## %for.body.preheader -; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32794,32794,32794,32794,32794,32794,32794] +; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: LBB1_2: ## %for.body ; SSE2-NEXT: ## =>This Inner Loop Header: Depth=1 @@ -101,9 +101,9 @@ ; SSE41-NEXT: testl %edx, %edx ; SSE41-NEXT: je LBB1_3 ; SSE41-NEXT: ## %bb.1: ## %for.body.preheader -; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,26,26,26,26,26,26,26] ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: .p2align 4, 0x90 ; SSE41-NEXT: LBB1_2: ## %for.body ; SSE41-NEXT: ## =>This Inner Loop Header: Depth=1 Index: llvm/test/CodeGen/X86/vector-idiv-v2i32.ll =================================================================== --- llvm/test/CodeGen/X86/vector-idiv-v2i32.ll +++ llvm/test/CodeGen/X86/vector-idiv-v2i32.ll @@ -839,8 +839,8 @@ ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %esi ; X86-NEXT: movl %eax, %esi -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %ebx ; X86-NEXT: movd %eax, %xmm0 ; X86-NEXT: movd %esi, %xmm1 @@ -885,8 +885,8 @@ ; X86_WIDEN-NEXT: xorl %edx, %edx ; X86_WIDEN-NEXT: divl (%ebx) ; X86_WIDEN-NEXT: movl %eax, %esi -; X86_WIDEN-NEXT: xorl %edx, %edx ; X86_WIDEN-NEXT: movl %ecx, %eax +; X86_WIDEN-NEXT: xorl %edx, %edx ; X86_WIDEN-NEXT: divl 4(%ebx) ; X86_WIDEN-NEXT: movl %eax, 4(%edi) ; X86_WIDEN-NEXT: movl %esi, (%edi) @@ -943,8 +943,8 @@ ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %esi ; X86-NEXT: movl %edx, %esi -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: movl %ecx, %eax +; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %ebx ; X86-NEXT: movd %edx, %xmm0 ; X86-NEXT: movd %esi, %xmm1 @@ -989,8 +989,8 @@ ; X86_WIDEN-NEXT: xorl %edx, %edx ; X86_WIDEN-NEXT: divl (%ebx) ; X86_WIDEN-NEXT: movl %edx, %esi -; X86_WIDEN-NEXT: xorl %edx, %edx ; X86_WIDEN-NEXT: movl %ecx, %eax +; X86_WIDEN-NEXT: xorl %edx, %edx ; X86_WIDEN-NEXT: divl 4(%ebx) ; X86_WIDEN-NEXT: movl %edx, 4(%edi) ; X86_WIDEN-NEXT: movl %esi, (%edi) Index: llvm/test/CodeGen/X86/vector-rem.ll =================================================================== --- llvm/test/CodeGen/X86/vector-rem.ll +++ llvm/test/CodeGen/X86/vector-rem.ll @@ -81,30 +81,30 @@ ; CHECK-LABEL: qux: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $72, %rsp -; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: callq fmodf -; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; CHECK-NEXT: callq fmodf -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] Index: llvm/test/CodeGen/X86/win32-eh.ll =================================================================== --- llvm/test/CodeGen/X86/win32-eh.ll +++ llvm/test/CodeGen/X86/win32-eh.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s declare void @may_throw_or_crash() @@ -9,6 +10,10 @@ declare i32 @llvm.eh.typeid.for(i8*) define internal i32 @catchall_filt() { +; CHECK-LABEL: catchall_filt: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: retl ret i32 1 } @@ -77,8 +82,8 @@ ; CHECK-NEXT: movl %ebp, %eax ; CHECK-NEXT: movl %esp, -36(%ebp) ; CHECK-NEXT: movl $-2, -16(%ebp) -; CHECK-NEXT: movl $L__ehtable$use_except_handler4, %[[lsda:[^ ,]*]] ; CHECK-NEXT: movl ___security_cookie, %[[seccookie:[^ ,]*]] +; CHECK-NEXT: movl $L__ehtable$use_except_handler4, %[[lsda:[^ ,]*]] ; CHECK-NEXT: xorl %[[seccookie]], %[[lsda]] ; CHECK-NEXT: movl %[[lsda]], -20(%ebp) ; CHECK-NEXT: xorl %[[seccookie]], %[[tmp1:[^ ,]*]] @@ -134,8 +139,8 @@ ; CHECK-NEXT: movl %ebp, %[[ehguard:[^ ,]*]] ; CHECK-NEXT: movl %esp, -36(%ebp) ; CHECK-NEXT: movl $-2, -16(%ebp) -; CHECK-NEXT: movl $L__ehtable$use_except_handler4_ssp, %[[lsda:[^ ,]*]] ; CHECK-NEXT: movl ___security_cookie, %[[seccookie:[^ ,]*]] +; CHECK-NEXT: movl $L__ehtable$use_except_handler4_ssp, %[[lsda:[^ ,]*]] ; CHECK-NEXT: xorl %[[seccookie]], %[[lsda]] ; CHECK-NEXT: movl %[[lsda]], -20(%ebp) ; CHECK-NEXT: xorl %[[seccookie]], %[[ehguard]] @@ -148,7 +153,7 @@ ; CHECK-NEXT: movl $0, -16(%ebp) ; CHECK-NEXT: calll _may_throw_or_crash ; CHECK: movl -28(%ebp), %[[next:[^ ,]*]] -; CHECK-NEXT: movl %[[next]], %fs:0 +; CHECK-NEXT: movl %[[next]], %fs:0 ; CHECK: retl ; CHECK-NEXT: [[catch:[^ ,]*]]: # %catch{{$}} @@ -158,7 +163,7 @@ ; CHECK-LABEL: L__ehtable$use_except_handler4_ssp: ; CHECK-NEXT: .long -2 ; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long -40 +; CHECK-NEXT: .long -40 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long -2 ; CHECK-NEXT: .long _catchall_filt Index: llvm/test/CodeGen/X86/x86-cmov-converter.ll =================================================================== --- llvm/test/CodeGen/X86/x86-cmov-converter.ll +++ llvm/test/CodeGen/X86/x86-cmov-converter.ll @@ -103,7 +103,7 @@ ; CHECK-LABEL: CmovInHotPath ; CHECK-NOT: cmov -; CHECK: jg +; CHECK: jne define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 { entry: @@ -296,9 +296,9 @@ ; CHECK-LABEL: Transform ; CHECK-NOT: cmov ; CHECK: divl [[a:%[0-9a-z]*]] -; CHECK: movl $11, [[s1:%[0-9a-z]*]] -; CHECK: movl [[a]], [[s2:%[0-9a-z]*]] -; CHECK: cmpl [[a]], %edx +; CHECK-DAG: movl $11, [[s1:%[0-9a-z]*]] +; CHECK-DAG: movl [[a]], [[s2:%[0-9a-z]*]] +; CHECK-DAG: cmpl [[a]], %edx ; CHECK: ja [[SinkBB:.*]] ; CHECK: [[FalseBB:.*]]: ; CHECK: movl $22, [[s1]] Index: llvm/test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -427,41 +427,38 @@ ; Check that we handle calls to variadic functions correctly. ; CHECK-LABEL: callVariadicFunc: ; +; ENABLE: movl %esi, %eax ; ENABLE: testl %edi, %edi ; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Prologue code. ; CHECK: pushq ; +; DISABLE: movl %esi, %eax ; DISABLE: testl %edi, %edi ; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]] ; ; Setup of the varags. -; CHECK: movl %esi, (%rsp) -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: %esi, %edi -; CHECK-NEXT: %esi, %edx -; CHECK-NEXT: %esi, %ecx -; CHECK-NEXT: %esi, %r8d -; CHECK-NEXT: %esi, %r9d +; CHECK: movl %eax, (%rsp) +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %eax, %r8d +; CHECK-NEXT: movl %eax, %r9d +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: callq _someVariadicFunc -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: shll $3, %esi +; CHECK-NEXT: shll $3, %eax ; ; ENABLE-NEXT: addq $8, %rsp -; ENABLE-NEXT: movl %esi, %eax ; ENABLE-NEXT: retq ; -; DISABLE: jmp [[IFEND_LABEL:LBB[0-9_]+]] -; + ; CHECK: [[ELSE_LABEL]]: ## %if.else ; Shift second argument by one and store into returned register. -; CHECK: addl %esi, %esi -; -; DISABLE: [[IFEND_LABEL]]: ## %if.end +; CHECK: addl %eax, %eax ; ; Epilogue code. -; CHECK-NEXT: movl %esi, %eax ; DISABLE-NEXT: popq ; CHECK-NEXT: retq define i32 @callVariadicFunc(i32 %cond, i32 %N) { Index: llvm/test/CodeGen/X86/xmulo.ll =================================================================== --- llvm/test/CodeGen/X86/xmulo.ll +++ llvm/test/CodeGen/X86/xmulo.ll @@ -34,24 +34,24 @@ define {i64, i1} @t2() nounwind { ; SDAG-LABEL: t2: ; SDAG: ## %bb.0: -; SDAG-NEXT: xorl %ecx, %ecx ; SDAG-NEXT: movl $9, %eax +; SDAG-NEXT: xorl %ecx, %ecx ; SDAG-NEXT: mulq %rcx ; SDAG-NEXT: seto %dl ; SDAG-NEXT: retq ; ; FAST-LABEL: t2: ; FAST: ## %bb.0: -; FAST-NEXT: xorl %ecx, %ecx ; FAST-NEXT: movl $9, %eax +; FAST-NEXT: xorl %ecx, %ecx ; FAST-NEXT: mulq %rcx ; FAST-NEXT: seto %dl ; FAST-NEXT: retq ; ; KNL-LABEL: t2: ; KNL: ## %bb.0: -; KNL-NEXT: xorl %ecx, %ecx ; KNL-NEXT: movl $9, %eax +; KNL-NEXT: xorl %ecx, %ecx ; KNL-NEXT: mulq %rcx ; KNL-NEXT: seto %dl ; KNL-NEXT: retq @@ -62,24 +62,24 @@ define {i64, i1} @t3() nounwind { ; SDAG-LABEL: t3: ; SDAG: ## %bb.0: -; SDAG-NEXT: movq $-1, %rcx ; SDAG-NEXT: movl $9, %eax +; SDAG-NEXT: movq $-1, %rcx ; SDAG-NEXT: mulq %rcx ; SDAG-NEXT: seto %dl ; SDAG-NEXT: retq ; ; FAST-LABEL: t3: ; FAST: ## %bb.0: -; FAST-NEXT: movq $-1, %rcx ; FAST-NEXT: movl $9, %eax +; FAST-NEXT: movq $-1, %rcx ; FAST-NEXT: mulq %rcx ; FAST-NEXT: seto %dl ; FAST-NEXT: retq ; ; KNL-LABEL: t3: ; KNL: ## %bb.0: -; KNL-NEXT: movq $-1, %rcx ; KNL-NEXT: movl $9, %eax +; KNL-NEXT: movq $-1, %rcx ; KNL-NEXT: mulq %rcx ; KNL-NEXT: seto %dl ; KNL-NEXT: retq Index: llvm/test/DebugInfo/X86/dbg-value-transfer-order.ll =================================================================== --- llvm/test/DebugInfo/X86/dbg-value-transfer-order.ll +++ llvm/test/DebugInfo/X86/dbg-value-transfer-order.ll @@ -25,8 +25,8 @@ ; CHECK-LABEL: f: # @f ; CHECK: .LBB0_2: # %while.body -; CHECK: movl $32, %ecx ; CHECK: testl {{.*}} +; CHECK: movl $32, %ecx ; CHECK: jne .LBB0_4 ; CHECK: # %bb.3: # %if.then ; CHECK: callq if_then Index: llvm/test/DebugInfo/X86/inlined-indirect-value.ll =================================================================== --- llvm/test/DebugInfo/X86/inlined-indirect-value.ll +++ llvm/test/DebugInfo/X86/inlined-indirect-value.ll @@ -24,10 +24,11 @@ @y = common global i32 0, align 4, !dbg !6 define i32 @main() !dbg !12 { -; CHECK: .loc 1 {{[89]}} +; CHECK: .loc 1 9 ; CHECK-NOT: .loc ; CHECK: movl $1 + entry: %0 = load volatile i32, i32* @x, align 4, !dbg !15, !tbaa !19 %mul.i = mul nsw i32 %0, 3, !dbg !23 Index: llvm/test/DebugInfo/X86/live-debug-values.ll =================================================================== --- llvm/test/DebugInfo/X86/live-debug-values.ll +++ llvm/test/DebugInfo/X86/live-debug-values.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: #DEBUG_VALUE: main:n <- $ebx ; Other register values have been clobbered. ; CHECK-NOT: #DEBUG_VALUE: -; CHECK: movl %esi, m(%rip) +; CHECK: movl %e{{..}}, m(%rip) ; ModuleID = 'LiveDebugValues.c' source_filename = "test/DebugInfo/X86/live-debug-values.ll" Index: llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -347,31 +347,31 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movl $3, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl $3, %esi ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB3_1: # %for.body ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movzbl -3(%esi,%eax), %edi -; X32-NEXT: movzbl -3(%edx,%eax), %ebx +; X32-NEXT: movzbl -3(%edx,%esi), %edi +; X32-NEXT: movzbl -3(%ecx,%esi), %ebx ; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, -3(%ecx,%eax) -; X32-NEXT: movzbl -2(%esi,%eax), %edi -; X32-NEXT: movzbl -2(%edx,%eax), %ebx +; X32-NEXT: movb %bl, -3(%eax,%esi) +; X32-NEXT: movzbl -2(%edx,%esi), %edi +; X32-NEXT: movzbl -2(%ecx,%esi), %ebx ; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, -2(%ecx,%eax) -; X32-NEXT: movzbl -1(%esi,%eax), %edi -; X32-NEXT: movzbl -1(%edx,%eax), %ebx +; X32-NEXT: movb %bl, -2(%eax,%esi) +; X32-NEXT: movzbl -1(%edx,%esi), %edi +; X32-NEXT: movzbl -1(%ecx,%esi), %ebx ; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, -1(%ecx,%eax) -; X32-NEXT: movzbl (%esi,%eax), %edi -; X32-NEXT: movzbl (%edx,%eax), %ebx +; X32-NEXT: movb %bl, -1(%eax,%esi) +; X32-NEXT: movzbl (%edx,%esi), %edi +; X32-NEXT: movzbl (%ecx,%esi), %ebx ; X32-NEXT: addl %edi, %ebx -; X32-NEXT: movb %bl, (%ecx,%eax) -; X32-NEXT: addl $4, %eax -; X32-NEXT: cmpl $403, %eax # imm = 0x193 +; X32-NEXT: movb %bl, (%eax,%esi) +; X32-NEXT: addl $4, %esi +; X32-NEXT: cmpl $403, %esi # imm = 0x193 ; X32-NEXT: jne .LBB3_1 ; X32-NEXT: # %bb.2: # %for.end ; X32-NEXT: popl %esi @@ -460,21 +460,21 @@ ; X32-LABEL: multioper: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %esi -; X32-NEXT: xorl %eax, %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-NEXT: xorl %edx, %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB4_1: # %for.body ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl %eax, (%edx,%eax,4) -; X32-NEXT: leal 1(%eax), %esi -; X32-NEXT: movl %esi, 4(%edx,%eax,4) -; X32-NEXT: leal 2(%eax), %esi -; X32-NEXT: movl %esi, 8(%edx,%eax,4) -; X32-NEXT: leal 3(%eax), %esi -; X32-NEXT: movl %esi, 12(%edx,%eax,4) -; X32-NEXT: addl $4, %eax -; X32-NEXT: cmpl %ecx, %eax +; X32-NEXT: movl %edx, (%ecx,%edx,4) +; X32-NEXT: leal 1(%edx), %esi +; X32-NEXT: movl %esi, 4(%ecx,%edx,4) +; X32-NEXT: leal 2(%edx), %esi +; X32-NEXT: movl %esi, 8(%ecx,%edx,4) +; X32-NEXT: leal 3(%edx), %esi +; X32-NEXT: movl %esi, 12(%ecx,%edx,4) +; X32-NEXT: addl $4, %edx +; X32-NEXT: cmpl %eax, %edx ; X32-NEXT: jl .LBB4_1 ; X32-NEXT: # %bb.2: # %exit ; X32-NEXT: popl %esi