Index: llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp +++ llvm/trunk/lib/Target/X86/X86InsertPrefetch.cpp @@ -76,6 +76,19 @@ return std::error_code(); } +// The prefetch instruction can't take memory operands involving vector +// registers. +bool IsMemOpCompatibleWithPrefetch(const MachineInstr &MI, int Op) { + unsigned BaseReg = MI.getOperand(Op + X86::AddrBaseReg).getReg(); + unsigned IndexReg = MI.getOperand(Op + X86::AddrIndexReg).getReg(); + return (BaseReg == 0 || + X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg)) && + (IndexReg == 0 || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)); +} + } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -182,6 +195,11 @@ int Offset = X86II::getMemoryOperandNo(Current->getDesc().TSFlags); if (Offset < 0) continue; + unsigned Bias = X86II::getOperandBias(Current->getDesc()); + int MemOpOffset = Offset + Bias; + // FIXME(mtrofin): ORE message when the recommendation cannot be taken. + if (!IsMemOpCompatibleWithPrefetch(*Current, MemOpOffset)) + continue; Prefetches.clear(); if (!findPrefetchInfo(Samples, *Current, Prefetches)) continue; @@ -195,8 +213,6 @@ MachineInstr *PFetch = MF.CreateMachineInstr(Desc, Current->getDebugLoc(), true); MachineInstrBuilder MIB(MF, PFetch); - unsigned Bias = X86II::getOperandBias(Current->getDesc()); - int MemOpOffset = Offset + Bias; assert(X86::AddrBaseReg == 0 && X86::AddrScaleAmt == 1 && X86::AddrIndexReg == 2 && X86::AddrDisp == 3 && Index: llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo =================================================================== --- llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo +++ llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.afdo @@ -0,0 +1,2 @@ +main:0:0 + 6: 0 __prefetch_nta_0:42 \ No newline at end of file Index: llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll +++ llvm/trunk/test/CodeGen/X86/insert-prefetch-invalid-instr.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s +; ModuleID = 'prefetch.cc' +source_filename = "prefetch.cc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 { +entry: + tail call void @llvm.prefetch(i8* inttoptr (i64 291 to i8*), i32 0, i32 0, i32 1), !dbg !9 + tail call void @llvm.x86.avx512.gatherpf.dpd.512(i8 97, <8 x i32> undef, i8* null, i32 1, i32 2), !dbg !10 + ret i32 291, !dbg !11 +} + +; Function Attrs: inaccessiblemem_or_argmemonly nounwind +declare void @llvm.prefetch(i8* nocapture readonly, i32, i32, i32) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.x86.avx512.gatherpf.dpd.512(i8, <8 x i32>, i8*, i32, i32) #2 + +attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"} +attributes #1 = { inaccessiblemem_or_argmemonly nounwind } +attributes #2 = { argmemonly nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) +!1 = !DIFile(filename: "prefetch.cc", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 7.0.0 (trunk 327078) (llvm/trunk 327086)"} +!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 12, column: 3, scope: !7) +!10 = !DILocation(line: 14, column: 3, scope: !7) +!11 = !DILocation(line: 15, column: 3, scope: !7) + +;CHECK-LABEL: main: +;CHECK: # %bb.0: +;CHECK: prefetchnta 291 +;CHECK-NOT: prefetchnta 42(%rax,%ymm0) +;CHECK: vgatherpf1dpd (%rax,%ymm0) {%k1} Index: llvm/trunk/test/CodeGen/X86/insert-prefetch-no-MachineMemOperand.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/insert-prefetch-no-MachineMemOperand.ll +++ llvm/trunk/test/CodeGen/X86/insert-prefetch-no-MachineMemOperand.ll @@ -1,46 +0,0 @@ -; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-nomemop.afdo | FileCheck %s -; ModuleID = 'prefetch.cc' -source_filename = "prefetch.cc" -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: norecurse nounwind uwtable -define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 { -entry: - tail call void @llvm.prefetch(i8* inttoptr (i64 291 to i8*), i32 0, i32 0, i32 1), !dbg !9 - tail call void @llvm.x86.avx512.gatherpf.dpd.512(i8 97, <8 x i32> undef, i8* null, i32 1, i32 2), !dbg !10 - ret i32 291, !dbg !11 -} - -; Function Attrs: inaccessiblemem_or_argmemonly nounwind -declare void @llvm.prefetch(i8* nocapture readonly, i32, i32, i32) #1 - -; Function Attrs: argmemonly nounwind -declare void @llvm.x86.avx512.gatherpf.dpd.512(i8, <8 x i32>, i8*, i32, i32) #2 - -attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"} -attributes #1 = { inaccessiblemem_or_argmemonly nounwind } -attributes #2 = { argmemonly nounwind } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, debugInfoForProfiling: true) -!1 = !DIFile(filename: "prefetch.cc", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"wchar_size", i32 4} -!6 = !{!"clang version 7.0.0 (trunk 327078) (llvm/trunk 327086)"} -!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 12, column: 3, scope: !7) -!10 = !DILocation(line: 14, column: 3, scope: !7) -!11 = !DILocation(line: 15, column: 3, scope: !7) - -;CHECK-LABEL: main: -;CHECK: # %bb.0: -;CHECK: prefetchnta 291 -;CHECK: prefetchnta 42(%rax,%ymm0) -;CHECK-NEXT: vgatherpf1dpd (%rax,%ymm0) {%k1} Index: llvm/trunk/test/CodeGen/X86/insert-prefetch-nomemop.afdo =================================================================== --- llvm/trunk/test/CodeGen/X86/insert-prefetch-nomemop.afdo +++ llvm/trunk/test/CodeGen/X86/insert-prefetch-nomemop.afdo @@ -1,2 +0,0 @@ -main:0:0 - 6: 0 __prefetch_nta_0:42 \ No newline at end of file