diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -440,17 +440,29 @@ /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, - std::vector &LiveRegDefs, + std::vector &LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { - if (RegAdded.insert(*AI).second) { - LRegs.push_back(*AI); - Added = true; - } + // Check if Ref is live. + if (!LiveRegDefs[*AI]) + continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[*AI] == SU) + continue; + + // Allow multiple uses of same def + if (Node && LiveRegDefs[*AI]->getNode() == Node) + continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(*AI).second) { + LRegs.push_back(*AI); + Added = true; } } return Added; @@ -502,6 +514,15 @@ } continue; } + + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1294,11 +1294,11 @@ /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. -static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, - SUnit **LiveRegDefs, +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { // Check if Ref is live. @@ -1307,6 +1307,10 @@ // Allow multiple uses of the same def. if (LiveRegDefs[*AliasI] == SU) continue; + // Allow multiple uses of same def + if (Node && LiveRegDefs[*AliasI]->getNode() == Node) + continue; + // Add Reg to the set of interfering live regs. if (RegAdded.insert(*AliasI).second) { LRegs.push_back(*AliasI); @@ -1387,6 +1391,15 @@ continue; } + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI, + SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; // If we're in the middle of scheduling a call, don't begin scheduling diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=source < %s | FileCheck -check-prefix=RRLIST %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=fast < %s | FileCheck -check-prefix=FAST %s + + +define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %pout.coerce) { +; RRLIST-LABEL: sccClobber: +; RRLIST: ; %bb.0: ; %entry +; RRLIST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; RRLIST-NEXT: v_mov_b32_e32 v2, 0 +; RRLIST-NEXT: s_waitcnt lgkmcnt(0) +; RRLIST-NEXT: s_load_dword s16, s[8:9], 0x0 +; RRLIST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; RRLIST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0 +; RRLIST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44 +; RRLIST-NEXT: s_load_dword s17, s[10:11], 0x0 +; RRLIST-NEXT: s_waitcnt lgkmcnt(0) +; RRLIST-NEXT: s_min_i32 s4, s16, 0 +; RRLIST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; RRLIST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1] +; RRLIST-NEXT: s_and_b64 s[0:1], vcc, exec +; RRLIST-NEXT: s_cselect_b32 s0, s16, s17 +; RRLIST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3] +; RRLIST-NEXT: s_cselect_b32 s0, s4, s0 +; RRLIST-NEXT: v_mov_b32_e32 v0, s0 +; RRLIST-NEXT: global_store_dword v2, v0, s[14:15] +; RRLIST-NEXT: s_endpgm +; +; FAST-LABEL: sccClobber: +; FAST: ; %bb.0: ; %entry +; FAST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; FAST-NEXT: v_mov_b32_e32 v2, 0 +; FAST-NEXT: s_waitcnt lgkmcnt(0) +; FAST-NEXT: s_load_dword s16, s[8:9], 0x0 +; FAST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; FAST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0 +; FAST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44 +; FAST-NEXT: s_load_dword s17, s[10:11], 0x0 +; FAST-NEXT: s_waitcnt lgkmcnt(0) +; FAST-NEXT: s_min_i32 s4, s16, 0 +; FAST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; FAST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1] +; FAST-NEXT: s_and_b64 s[0:1], vcc, exec +; FAST-NEXT: s_cselect_b32 s0, s16, s17 +; FAST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3] +; FAST-NEXT: s_cselect_b32 s0, s4, s0 +; FAST-NEXT: v_mov_b32_e32 v0, s0 +; FAST-NEXT: global_store_dword v2, v0, s[14:15] +; FAST-NEXT: s_endpgm +entry: + %i = load i64, ptr addrspace(1) %a, align 8 + %i.1 = load i64, ptr addrspace(1) %b, align 8 + %i.2 = load i32, ptr addrspace(1) %e, align 4 + %i.3 = load i32, ptr addrspace(1) %f, align 4 + %cmp7.1 = icmp eq i64 %i, %i.1 + %call.1 = tail call noundef i32 @llvm.smin.i32(i32 noundef 0, i32 noundef %i.2) + %cmp8.1 = icmp slt i64 %i, %i.1 + %cond.1 = select i1 %cmp8.1, i32 %i.2, i32 %i.3 + %cond14.1 = select i1 %cmp7.1, i32 %call.1, i32 %cond.1 + store i32 %cond14.1, ptr addrspace(1) %pout.coerce, align 4 + ret void +} + +declare i32 @llvm.smin.i32(i32, i32) diff --git a/llvm/test/CodeGen/X86/comi b/llvm/test/CodeGen/X86/comi new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/comi @@ -0,0 +1,89 @@ +ftrunc.ll +vector-truncate-combine.ll +fptoui-sat-scalar.ll +sqrt-partial.ll +swifterror.ll +vector-constrained-fp-intrinsics.ll +win-alloca-expander.ll +avx512-cmp.ll +cmov-into-branch.ll +sse2-intrinsics-x86.ll +fp-intrinsics.ll +stack-folding-fp-avx1.ll +vector-reduce-fmin-nnan.ll +avx512-intrinsics.ll +fast-isel-prolog-dbgloc.ll +extractelement-fp.ll +zext-demanded.ll +compare_folding.ll +fp-strict-scalar-fptoint.ll +vec-strict-cmp-sub128.ll +isint.ll +stack-protector.ll +fcmp-logic.ll +fsxor-alignment.ll +win64_params.ll +fast-isel-cmp.ll +sse2-intrinsics-fast-isel.ll +fast-isel-select-cmov2.ll +undef-label.ll +avx512-vec-cmp.ll +sse-intrinsics-x86.ll +fp-strict-scalar-cmp-fp16.ll +tailcall-msvc-conventions.ll +2008-08-17-UComiCodeGenBug.ll +is_fpclass.ll +or-lea.ll +tailcall-cgp-dup.ll +cmov-double.ll +fp-une-cmp.ll +fpclamptosat.ll +scalar-fp-to-i64.ll +pseudo_cmov_lower2.ll +pr44749.ll +fp-stack-compare.ll +fast-isel-select-cmp.ll +fsetcc.ll +stack-folding-fp-sse42.ll +avx-cmp.ll +select.ll +movtopush64.ll +bitcast-int-to-vector.ll +vec-strict-fptoint-512.ll +vec-strict-cmp-128-fp16.ll +fast-isel-cmp-branch3.ll +fast-isel-select-pseudo-cmov.ll +pr51615.ll +2008-10-24-FlippedCompare.ll +statepoint-vreg.ll +avx512-insert-extract.ll +avx512fp16-intrinsics.ll +vec-strict-fptoint-128.ll +fast-isel-cmp-branch2.ll +avx512fp16-arith.ll +cmovcmov.ll +fp-cvt.ll +fptosi-sat-vector-128.ll +inline-asm-fpstack.ll +avx512-select.ll +fp-strict-scalar-cmp.ll +comi-flags.ll +avx512-fsel.ll +vec-strict-fptoint-256.ll +half.ll +brcond.ll +avx512-calling-conv.ll +fp80-strict-scalar.ll +switch.ll +fptoui-sat-vector-128.ll +fptosi-sat-scalar.ll +cmp.ll +inline-asm-x-scalar.ll +nontemporal-loads.ll +vec-strict-cmp-128.ll +compare-inf.ll +isnan2.ll +sse-intrinsics-fast-isel.ll +vector-reduce-fmax-nnan.ll +AMX/lat-combine-amx-bitcast.ll +tail-opts.ll diff --git a/llvm/test/CodeGen/X86/sse b/llvm/test/CodeGen/X86/sse new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/sse @@ -0,0 +1,579 @@ +haddsub-shuf.ll +vec_uint_to_fp.ll +avx512-rotate.ll +vshift_split.ll +ftrunc.ll +viabs.ll +paddus.ll +sdiv-exact.ll +vector-reduce-smax.ll +horizontal-reduce-smax.ll +vselect-packss.ll +avx512vbmi2vl-intrinsics-upgrade.ll +avx-shift.ll +smax.ll +memcmp.ll +vector-zext.ll +oddshuffles.ll +widen_conv-1.ll +insertelement-shuffle.ll +vector-fshr-128.ll +vector-sext.ll +vector-interleaved-load-i8-stride-2.ll +horizontal-reduce-umax.ll +vec_ctbits.ll +vector-reduce-smin.ll +vector-shift-shl-sub128.ll +vec_set.ll +avx2-intrinsics-x86.ll +vector-popcnt-256.ll +vector-idiv-udiv-128.ll +vector-shuffle-512-v64.ll +vector-compare-results.ll +hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +smul_fix_sat.ll +shuffle-vs-trunc-128.ll +vsplit-and.ll +avx-logic.ll +omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +vector-constrained-fp-intrinsics.ll +combine-bitselect.ll +prefer-avx256-lzcnt.ll +vector-trunc-usat.ll +avx512-intrinsics-upgrade.ll +vec_uint_to_fp-fastmath.ll +div-rem-pair-recomposition-unsigned.ll +pr44976.ll +avx2-shift.ll +avx512f-vec-test-testn.ll +vshift-3.ll +vector-ext-logic.ll +vector-interleaved-store-i32-stride-2.ll +vector-trunc-ssat.ll +vector-reduce-fmax.ll +horizontal-reduce-smin.ll +vector-reduce-add-zext.ll +vec_setcc-2.ll +fast-isel-bitcast-crash.ll +combine-add.ll +vector-reduce-umax.ll +pr48215.ll +masked_gather_scatter_widen.ll +combine-srem.ll +vector-interleaved-store-i8-stride-4.ll +avx512-trunc.ll +vshift-6.ll +ssub_sat.ll +memcmp-more-load-pairs.ll +widen_conv-3.ll +ssse3-intrinsics-fast-isel.ll +x86-setcc-int-to-fp-combine.ll +vec_smulo.ll +pointer-vector.ll +vector-reduce-or-bool.ll +combine-addo.ll +bitreverse.ll +vselect-minmax.ll +combine-ptest.ll +srem-seteq-vec-nonsplat.ll +var-permute-256.ll +vsel-cmp-load.ll +urem-seteq-vec-nonsplat.ll +avx512bw-intrinsics-canonical.ll +vector-idiv.ll +vector-shuffle-128-v4.ll +vector-reduce-fmin-nnan.ll +vector-interleaved-load-i8-stride-4.ll +vector-interleaved-load-i32-stride-6.ll +vector-reduce-umin.ll +sse41-intrinsics-fast-isel.ll +vector-interleaved-load-i64-stride-4.ll +select-sra.ll +var-permute-128.ll +vector-idiv-sdiv-128.ll +avx512-gather-scatter-intrin-deprecated.ll +vselect-constants.ll +vector-interleaved-load-i64-stride-2.ll +vector-reduce-xor-bool.ll +avx512-intrinsics.ll +avx512vpopcntdq-intrinsics.ll +vector-interleaved-store-i8-stride-3.ll +vshift-5.ll +vector-gep.ll +avx512vl-mov.ll +nontemporal-loads-2.ll +sse4a.ll +palignr.ll +avx512fp16-fminnum.ll +phaddsub-undef.ll +masked_expandload.ll +vector-shift-ashr-128.ll +pmaddubsw.ll +vector-shift-shl-256.ll +vector-shift-ashr-256.ll +dpbusd_i4.ll +bitcast-int-to-vector-bool-sext.ll +vec_compare.ll +avx512vnni-intrinsics-upgrade.ll +broadcast-elm-cross-splat-vec.ll +avx512-intrinsics-fast-isel.ll +vector-fshl-rot-256.ll +haddsub-4.ll +avx512vbmi-intrinsics-fast-isel.ll +umul_fix_sat.ll +combine-sra.ll +dpbusd_const.ll +vec_int_to_fp.ll +pmulh.ll +sadd_sat.ll +sext-vsetcc.ll +cmp-concat.ll +avx512vbmivl-intrinsics-fast-isel.ll +avx2-nontemporal.ll +vector-shuffle-512-v8.ll +vector-shift-lshr-512.ll +stack-folding-int-avx512vnni.ll +gather-addresses.ll +ssub_sat_vec.ll +vector-interleaved-load-i16-stride-2.ll +vector-interleaved-store-i64-stride-3.ll +vector-shuffle-combining-avx2.ll +avx512bwvl-intrinsics-upgrade.ll +vector-popcnt-512-ult-ugt.ll +vector_splat-const-shift-of-constmasked.ll +midpoint-int-vec-128.ll +bitcast-setcc-256.ll +srem-vector-lkk.ll +vector-interleaved-load-i16-stride-3.ll +merge-consecutive-stores-nt.ll +combine-sdiv.ll +avx512cd-intrinsics-upgrade.ll +unfold-masked-merge-vector-variablemask-const.ll +vector-reduce-xor.ll +combine-multiplies.ll +stack-folding-int-avxvnni.ll +vec_zero_cse.ll +bitcast-vector-bool.ll +vector-fshl-rot-512.ll +vector-tzcnt-256.ll +vector-blend.ll +pr51371.ll +horizontal-reduce-umin.ll +vector-shuffle-256-v8.ll +avx512fp16-fmaxnum.ll +combine-shl.ll +widened-broadcast.ll +sdiv_fix_sat.ll +sse2-intrinsics-fast-isel.ll +bitcast-and-setcc-256.ll +avx512vl-intrinsics-fast-isel.ll +vec_minmax_uint.ll +shuffle-strided-with-offset-512.ll +avx512bw-intrinsics-upgrade.ll +vector-compare-all_of.ll +widen_cast-4.ll +stack-folding-int-avx512vl.ll +vector-interleaved-store-i32-stride-3.ll +umax.ll +vector-compare-simplify.ll +vector-interleaved-store-i16-stride-5.ll +combine-smax.ll +usub_sat.ll +vector-shuffle-masked.ll +vector-interleaved-store-i16-stride-2.ll +avx512-vec-cmp.ll +unfold-masked-merge-vector-variablemask.ll +vector-fshr-rot-128.ll +avx2-arith.ll +v8i1-masks.ll +vector-lzcnt-128.ll +vshift-4.ll +gfni-rotates.ll +urem-seteq-vec-splat.ll +signbit-shift.ll +min-legal-vector-width.ll +avx512bwvl-intrinsics-fast-isel.ll +lower-vec-shift.ll +vec_minmax_sint.ll +vec_zero.ll +avx512-insert-extract_i1.ll +shuffle-strided-with-offset-256.ll +combine-sra-load.ll +combine-sext-in-reg.ll +fold-pcmpeqd-2.ll +avg.ll +avx-load-store.ll +vector-shuffle-v192.ll +vector-shuffle-combining-avx512vbmi.ll +vector-rotate-128.ll +vector-reduce-mul.ll +avx512vl_vnni-intrinsics-upgrade.ll +combine-urem.ll +vector-interleaved-load-i32-stride-4.ll +avx2-vbroadcasti128.ll +avx512bwvl-intrinsics.ll +avx512cd-intrinsics.ll +sse41.ll +avx512fp16-frem.ll +pr31773.ll +vector-fshl-512.ll +vector-interleaved-store-i64-stride-2.ll +selectiondag-order.ll +combine-pmuldq.ll +vector-mulfix-legalize.ll +vector-shuffle-avx512.ll +shuffle-vs-trunc-256.ll +avx512vbmivl-intrinsics.ll +vec_setcc.ll +vector-trunc-math.ll +vector-interleaved-load-i8-stride-3.ll +setcc-logic.ll +midpoint-int-vec-512.ll +prefer-avx256-popcnt.ll +avx512-mask-op.ll +vec_sdiv_to_shift.ll +bswap-vector.ll +ushl_sat_vec.ll +stack-folding-xop.ll +vector-shuffle-combining-avx512bw.ll +statepoint-vector.ll +vector-shuffle-256-v4.ll +vector-shuffle-256-v16.ll +masked_store_trunc_ssat.ll +vselect-zero.ll +avx512vbmi-intrinsics.ll +vector-fshr-rot-512.ll +psubus.ll +vector-shift-ashr-sub128.ll +sat-add.ll +avx512-logic.ll +vector-zmov.ll +vec_cast.ll +shuffle-strided-with-offset-128.ll +widen_cast-2.ll +midpoint-int-vec-256.ll +copy-low-subvec-elt-to-high-subvec-elt.ll +combine-mulo.ll +vector-interleaved-load-i32-stride-3.ll +add.ll +urem-vector-lkk.ll +shrink_vmul.ll +split-vector-rem.ll +combine-sub.ll +vector-idiv-sdiv-512.ll +umin.ll +urem-seteq-illegal-types.ll +masked_compressstore.ll +insert-into-constant-vector.ll +vector-fshr-rot-256.ll +phaddsub.ll +vector-reduce-and-bool.ll +vec_compare-sse4.ll +bitcast-setcc-128.ll +vector-interleaved-store-i8-stride-6.ll +funnel-shift-rot.ll +vector-shuffle-sse4a.ll +vec3-setcc-crash.ll +scalar_widen_div.ll +phaddsub-extract.ll +vector-narrow-binop.ll +usub_sat_vec.ll +vector-shuffle-combining-avx.ll +cast-vsel.ll +pr42905.ll +masked_load.ll +avx2-gather.ll +vector-shuffle-512-v16.ll +prefer-avx256-mask-shuffle.ll +avx512-mov.ll +sse-domains.ll +gfni-shifts.ll +vector-shuffle-combining-sse41.ll +sse-insertelt-from-mem.ll +sttni.ll +pr45563.ll +speculative-load-hardening-gather.ll +vec-trunc-store.ll +perm.avx2-false-deps.ll +sink-addsub-of-const.ll +vector-shuffle-combining-avx512bwvl.ll +vector-lzcnt-512.ll +vector-interleaved-store-i16-stride-4.ll +oddsubvector.ll +extract-concat.ll +vec_umulo.ll +pmulld.ll +pr50709.ll +uadd_sat_vec.ll +avx512-gfni-intrinsics.ll +avx512-ext.ll +vector-reduce-and.ll +avx512bwvl-intrinsics-canonical.ll +x86-interleaved-access.ll +named-vector-shuffle-reverse.ll +vector-reduce-add-mask.ll +avx512vl-intrinsics-upgrade.ll +clear_upper_vector_element_bits.ll +avx512vl-intrinsics.ll +vselect.ll +vector-interleaved-store-i64-stride-6.ll +vec_cmp_uint-128.ll +bitcast-and-setcc-128.ll +combine-udiv.ll +vector-fshr-256.ll +vector-shift-shl-128.ll +2012-01-18-vbitcast.ll +vector-popcnt-256-ult-ugt.ll +avx512dqvl-intrinsics-upgrade.ll +widen_load-2.ll +select.ll +machine-combiner-int-vec.ll +xor.ll +select-ext.ll +gfni-funnel-shifts.ll +vector-tzcnt-512.ll +vector-shuffle-128-v8.ll +avx512-masked_memop-16-8.ll +pr44915.ll +dpbusd.ll +sshl_sat_vec.ll +frem.ll +avx512-vbroadcast.ll +vec_saddo.ll +popcnt.ll +vec-strict-fptoint-512.ll +vector-shuffle-512-v32.ll +i64-to-float.ll +ssse3-intrinsics-x86.ll +sse-align-6.ll +uadd_sat.ll +avx512cdvl-intrinsics-upgrade.ll +vselect-pcmp.ll +fast-isel-store.ll +shift-logic.ll +mul-demand.ll +vector-shuffle-128-v16.ll +avx2-intrinsics-fast-isel.ll +vec_usubo.ll +avx512fp16-subv-broadcast-fp16.ll +bitcast-int-to-vector-bool.ll +prefer-avx256-wide-mul.ll +avx512vl-logic.ll +pr34139.ll +pmul.ll +avx512-regcall-NoMask.ll +pr39733.ll +vector-fshr-rot-sub128.ll +sse-insertelt.ll +vector-popcnt-128.ll +fast-isel-vecload.ll +vec_cast3.ll +pr32907.ll +vector-reduce-fmin.ll +shuffle-of-insert.ll +x86-shifts.ll +udiv_fix.ll +vector-shuffle-256-v32.ll +vector-shift-lshr-sub128.ll +vector-trunc-packus.ll +vshift_split2.ll +avx2-masked-gather.ll +reduce-trunc-shl.ll +combine-rotates.ll +shuffle-vs-trunc-512.ll +vector-idiv-udiv-256.ll +vector-shift-by-select-loop.ll +known-signbits-vector.ll +avx512bw-intrinsics.ll +horizontal-shuffle-2.ll +vector-idiv-sdiv-256.ll +vec_shift4.ll +vector-reduce-add.ll +insertps-combine.ll +avx512-extract-subvector-load-store.ll +dagcombine-buildvector.ll +vector-lzcnt-sub128.ll +vector-reduce-and-cmp.ll +vec_cmp_sint-128.ll +prefer-avx256-mulo.ll +inline-asm-x-i128.ll +avx512-insert-extract.ll +vector-rotate-256.ll +vector-lzcnt-256.ll +movmsk-cmp.ll +vector-interleaved-store-i32-stride-6.ll +vector-rotate-512.ll +avx512-vbroadcasti256.ll +masked_store.ll +widen_arith-2.ll +avx512vbmi-intrinsics-upgrade.ll +extractelement-legalization-store-ordering.ll +pr47874.ll +vec-strict-inttofp-128.ll +avx512-shuffles/shuffle-vec.ll +avx512-shuffles/broadcast-vector-int.ll +avx512-shuffles/partial_permute.ll +avx512-shuffles/shuffle.ll +avx512-shuffles/permute.ll +srem-seteq-illegal-types.ll +vec-strict-fptoint-128.ll +vector-shuffle-v1.ll +combine-mul.ll +vector-unsigned-cmp.ll +pr18014.ll +widen_arith-4.ll +haddsub-2.ll +div-rem-pair-recomposition-signed.ll +bitcast-int-to-vector-bool-zext.ll +avx2-vector-shifts.ll +buildvec-insertvec.ll +madd.ll +vselect-avx.ll +masked_gather_scatter.ll +sar_fold64.ll +avx-vperm2x128.ll +vshift-1.ll +avx512-cvt.ll +sdiv_fix.ll +urem-seteq-vec-nonzero.ll +subvector-broadcast.ll +avx512-nontemporal.ll +horizontal-sum.ll +masked_store_trunc.ll +vector-shift-lshr-128.ll +vector-reduce-or.ll +hoist-and-by-const-from-shl-in-eqcmp-zero.ll +unaligned-spill-folding.ll +i64-mem-copy.ll +machine-cp.ll +avx512-regcall-Mask.ll +pr14161.ll +vector-extend-inreg.ll +vector-bo-select.ll +vector-interleaved-load-i16-stride-4.ll +fptosi-sat-vector-128.ll +avx512vbmi2-intrinsics.ll +pr46527.ll +avx512-select.ll +vector-shuffle-combining.ll +pr18054.ll +packss.ll +widen_arith-5.ll +vector-interleave.ll +smin.ll +vector-interleaved-load-i32-stride-2.ll +vshli-simplify-demanded-bits.ll +avx512-arith.ll +vector-popcnt-128-ult-ugt.ll +vector-idiv-v2i32.ll +avx512-gather-scatter-intrin.ll +vector-interleaved-load-i64-stride-6.ll +udiv_fix_sat.ll +stack-folding-int-avx512.ll +combine-sse41-intrinsics.ll +vec_ssubo.ll +vector-tzcnt-128.ll +avx512vl-vec-masked-cmp.ll +sse41-intrinsics-x86.ll +vector-pcmp.ll +vector-shift-ashr-512.ll +avx512vbmi2-intrinsics-upgrade.ll +pr46820.ll +srem-seteq-vec-splat.ll +vec-strict-fptoint-256.ll +nontemporal.ll +promote-cmp.ll +fast-isel-nontemporal.ll +half.ll +vector-interleaved-store-i32-stride-4.ll +avx512-calling-conv.ll +widen_conv-4.ll +vector-shuffle-combining-ssse3.ll +avx512vl_vnni-intrinsics.ll +stack-folding-int-sse42.ll +vector-shift-lshr-256.ll +splat-for-size.ll +vec_ins_extract-1.ll +fp-intrinsics-fma.ll +combine-or.ll +avx512dq-intrinsics-upgrade.ll +pic-load-remat.ll +combine-srl.ll +combine-sub-usat.ll +avx512vbmivl-intrinsics-upgrade.ll +vector-interleaved-load-i64-stride-3.ll +prefer-avx256-mask-extend.ll +addsub-constant-folding.ll +vec_shift6.ll +avg-mask.ll +fptoui-sat-vector-128.ll +pr37499.ll +avx512-bugfix-23634.ll +vector-shuffle-v48.ll +load-partial.ll +vector-interleaved-load-i16-stride-5.ll +bitcast-and-setcc-512.ll +vector-reduce-add-sext.ll +vector-shuffle-128-v2.ll +vector-rem.ll +extractelement-load.ll +fp128-cast-strict.ll +cvtv2f32.ll +avx-cvt-2.ll +masked_store_trunc_usat.ll +combine-pavg.ll +avx512-vpternlog-commute.ll +vector-idiv-udiv-512.ll +fpclamptosat_vec.ll +vector-interleaved-store-i8-stride-2.ll +vector-interleaved-store-i16-stride-3.ll +vector-shuffle-combining-sse4a.ll +pmullq-false-deps.ll +vec_fp_to_int.ll +nontemporal-loads.ll +combine-abs.ll +vector-pack-512.ll +sadd_sat_vec.ll +sse42-intrinsics-x86.ll +vector-fshl-128.ll +avx2-vbroadcast.ll +avx512bw-intrinsics-fast-isel.ll +vector-interleaved-load-i8-stride-6.ll +vector-bitreverse.ll +vector-fshl-rot-sub128.ll +var-permute-512.ll +merge-consecutive-loads-256.ll +GlobalISel/callingconv.ll +concat-cast.ll +vector-fshr-512.ll +vector-mul.ll +vec_uaddo.ll +urem-seteq-vec-tautological.ll +mulvi32.ll +vector-shift-shl-512.ll +memset-inline.ll +vector-shuffle-combining-avx512f.ll +avx512vbmi2vl-intrinsics.ll +vector-interleaved-store-i16-stride-6.ll +sad.ll +vector-interleaved-load-i16-stride-6.ll +vector-compare-any_of.ll +avx512vnni-intrinsics.ll +vector-reduce-fmax-nnan.ll +horizontal-reduce-add.ll +avx512cdvl-intrinsics.ll +slow-pmulld.ll +vector-trunc.ll +sse3.ll +AMX/amx-configO0toO0.ll +abs.ll +masked_gather.ll +vector-fshl-rot-128.ll +combine-bitreverse.ll +vselect-avx512.ll +combine-smin.ll +vshift-2.ll +lvi-hardening-inline-asm.ll +vector-interleaved-store-i64-stride-4.ll +vector-popcnt-512.ll +vector-fshl-256.ll