Index: lib/Target/PowerPC/PPCVSXFMAMutate.cpp =================================================================== --- lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -220,6 +220,11 @@ if (OldFMAReg == KilledProdReg) continue; + // If there isn't a class that fits, we can't perform the transform. + if (!MRI.constrainRegClass(KilledProdReg, + MRI.getRegClass(OldFMAReg))) + continue; + assert(OldFMAReg == AddendMI->getOperand(0).getReg() && "Addend copy not tied to old FMA output!"); @@ -262,8 +267,7 @@ if (UseMI == AddendMI) continue; - UseMO.setReg(KilledProdReg); - UseMO.setSubReg(KilledProdSubReg); + UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI); } // Extend the live intervals of the killed product operand to hold the Index: test/CodeGen/PowerPC/fma-mutate-register-constraint.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/fma-mutate-register-constraint.ll @@ -0,0 +1,264 @@ +; RUN: llc -enable-unsafe-fp-math < %s | FileCheck %s +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK-NOT: {{vmrg[hl]w.*(3[23456789]|[456][0-9])}} +define void @__f0() { +entry: + %0 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> + %1 = shufflevector <16 x float> %0, <16 x float> undef, <32 x i32> + %2 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> + %3 = shufflevector <16 x float> %2, <16 x float> undef, <32 x i32> + %4 = fmul <32 x float> %1, %3 + %5 = load <4 x float>, <4 x float>* undef, align 128, !tbaa !1 + %6 = load <4 x float>, <4 x float>* undef, align 128, !tbaa !13 + %7 = shufflevector <4 x float> undef, <4 x float> %5, <8 x i32> + %8 = shufflevector <4 x float> undef, <4 x float> %6, <8 x i32> + %9 = shufflevector <8 x float> %7, <8 x float> %8, <16 x i32> + %10 = shufflevector <16 x float> undef, <16 x float> %9, <32 x i32> + %11 = load <4 x float>, <4 x float>* null, align 128, !tbaa !16 + %12 = load <4 x float>, <4 x float>* undef, align 128, !tbaa !27 + %13 = shufflevector <4 x float> undef, <4 x float> %11, <8 x i32> + %14 = shufflevector <4 x float> undef, <4 x float> %12, <8 x i32> + %15 = shufflevector <8 x float> %13, <8 x float> %14, <16 x i32> + %16 = shufflevector <16 x float> undef, <16 x float> %15, <32 x i32> + %17 = fmul <32 x float> %10, %16 + %18 = fsub <32 x float> %4, %17 + %19 = shufflevector <32 x float> %18, <32 x float> undef, <64 x i32> + %20 = bitcast <64 x float> %19 to <32 x double> + %21 = shufflevector <32 x double> undef, <32 x double> %20, <64 x i32> + %22 = bitcast <64 x double> %21 to <128 x float> + %23 = shufflevector <128 x float> undef, <128 x float> %22, <256 x i32> + %24 = shufflevector <256 x float> undef, <256 x float> %23, <512 x i32> + %25 = shufflevector <512 x float> %24, <512 x float> undef, <1024 x i32> + %26 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %26, <4 x float>* undef, align 128, !tbaa !30 + %27 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %27, <4 x float>* undef, align 128, !tbaa !41 + %28 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %28, <4 x float>* undef, align 128, !tbaa !46 + %29 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %29, <4 x float>* undef, align 128, !tbaa !53 + %30 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %30, <4 x float>* undef, align 128, !tbaa !56 + %31 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %31, <4 x float>* undef, align 128, !tbaa !58 + %32 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %32, <4 x float>* undef, align 128, !tbaa !62 + %33 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %33, <4 x float>* undef, align 128, !tbaa !70 + %34 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %34, <4 x float>* undef, align 128, !tbaa !76 + %35 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %35, <4 x float>* undef, align 128, !tbaa !78 + %36 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %36, <4 x float>* undef, align 128, !tbaa !87 + %37 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %37, <4 x float>* undef, align 128, !tbaa !91 + %38 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %38, <4 x float>* undef, align 128, !tbaa !98 + %39 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %39, <4 x float>* undef, align 128, !tbaa !100 + %40 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %40, <4 x float>* undef, align 128, !tbaa !104 + %41 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %41, <4 x float>* undef, align 128, !tbaa !110 + %42 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %42, <4 x float>* undef, align 128, !tbaa !115 + %43 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %43, <4 x float>* undef, align 128, !tbaa !117 + %44 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %44, <4 x float>* undef, align 128, !tbaa !125 + %45 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %45, <4 x float>* undef, align 128, !tbaa !132 + %46 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %46, <4 x float>* undef, align 128, !tbaa !137 + %47 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %47, <4 x float>* undef, align 128, !tbaa !139 + %48 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %48, <4 x float>* undef, align 128, !tbaa !150 + %49 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %49, <4 x float>* undef, align 128, !tbaa !152 + %50 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %50, <4 x float>* undef, align 128, !tbaa !156 + %51 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %51, <4 x float>* undef, align 128, !tbaa !158 + %52 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %52, <4 x float>* undef, align 128, !tbaa !165 + ret void +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"x_S8_R4$2.1.width4.base724", !3} +!3 = !{!"x_S8_R4$2.1.width8.base720", !4} +!4 = !{!"x_S8_R4$2.1.width16.base720", !5} +!5 = !{!"x_S8_R4$2.1.width32.base704", !6} +!6 = !{!"x_S8_R4$2.1.width64.base704", !7} +!7 = !{!"x_S8_R4$2.1.width128.base640", !8} +!8 = !{!"x_S8_R4$2.1.width256.base512", !9} +!9 = !{!"x_S8_R4$2.1.width512.base512", !10} +!10 = !{!"x_S8_R4$2.1.width1024.base0", !11} +!11 = !{!"x_S8_R4$2.1", !12} +!12 = !{!"Halide buffer"} +!13 = !{!14, !14, i64 0} +!14 = !{!"x_S8_R4$2.1.width4.base732", !15} +!15 = !{!"x_S8_R4$2.1.width8.base728", !4} +!16 = !{!17, !17, i64 0} +!17 = !{!"x_S8_R4$4.1.width4.base724", !18} +!18 = !{!"x_S8_R4$4.1.width8.base720", !19} +!19 = !{!"x_S8_R4$4.1.width16.base720", !20} +!20 = !{!"x_S8_R4$4.1.width32.base704", !21} +!21 = !{!"x_S8_R4$4.1.width64.base704", !22} +!22 = !{!"x_S8_R4$4.1.width128.base640", !23} +!23 = !{!"x_S8_R4$4.1.width256.base512", !24} +!24 = !{!"x_S8_R4$4.1.width512.base512", !25} +!25 = !{!"x_S8_R4$4.1.width1024.base0", !26} +!26 = !{!"x_S8_R4$4.1", !12} +!27 = !{!28, !28, i64 0} +!28 = !{!"x_S8_R4$4.1.width4.base732", !29} +!29 = !{!"x_S8_R4$4.1.width8.base728", !19} +!30 = !{!31, !31, i64 0} +!31 = !{!"f16.0.width4.base0", !32} +!32 = !{!"f16.0.width8.base0", !33} +!33 = !{!"f16.0.width16.base0", !34} +!34 = !{!"f16.0.width32.base0", !35} +!35 = !{!"f16.0.width64.base0", !36} +!36 = !{!"f16.0.width128.base0", !37} +!37 = !{!"f16.0.width256.base0", !38} +!38 = !{!"f16.0.width512.base0", !39} +!39 = !{!"f16.0.width1024.base0", !40} +!40 = !{!"f16.0", !12} +!41 = !{!42, !42, i64 0} +!42 = !{!"f16.0.width4.base56", !43} +!43 = !{!"f16.0.width8.base56", !44} +!44 = !{!"f16.0.width16.base48", !45} +!45 = !{!"f16.0.width32.base32", !35} +!46 = !{!47, !47, i64 0} +!47 = !{!"f16.0.width4.base164", !48} +!48 = !{!"f16.0.width8.base160", !49} +!49 = !{!"f16.0.width16.base160", !50} +!50 = !{!"f16.0.width32.base160", !51} +!51 = !{!"f16.0.width64.base128", !52} +!52 = !{!"f16.0.width128.base128", !37} +!53 = !{!54, !54, i64 0} +!54 = !{!"f16.0.width4.base168", !55} +!55 = !{!"f16.0.width8.base168", !49} +!56 = !{!57, !57, i64 0} +!57 = !{!"f16.0.width4.base172", !55} +!58 = !{!59, !59, i64 0} +!59 = !{!"f16.0.width4.base176", !60} +!60 = !{!"f16.0.width8.base176", !61} +!61 = !{!"f16.0.width16.base176", !50} +!62 = !{!63, !63, i64 0} +!63 = !{!"f16.0.width4.base284", !64} +!64 = !{!"f16.0.width8.base280", !65} +!65 = !{!"f16.0.width16.base272", !66} +!66 = !{!"f16.0.width32.base256", !67} +!67 = !{!"f16.0.width64.base256", !68} +!68 = !{!"f16.0.width128.base256", !69} +!69 = !{!"f16.0.width256.base256", !38} +!70 = !{!71, !71, i64 0} +!71 = !{!"f16.0.width4.base328", !72} +!72 = !{!"f16.0.width8.base328", !73} +!73 = !{!"f16.0.width16.base320", !74} +!74 = !{!"f16.0.width32.base320", !75} +!75 = !{!"f16.0.width64.base320", !68} +!76 = !{!77, !77, i64 0} +!77 = !{!"f16.0.width4.base332", !72} +!78 = !{!79, !79, i64 0} +!79 = !{!"f16.0.width4.base524", !80} +!80 = !{!"f16.0.width8.base520", !81} +!81 = !{!"f16.0.width16.base512", !82} +!82 = !{!"f16.0.width32.base512", !83} +!83 = !{!"f16.0.width64.base512", !84} +!84 = !{!"f16.0.width128.base512", !85} +!85 = !{!"f16.0.width256.base512", !86} +!86 = !{!"f16.0.width512.base512", !39} +!87 = !{!88, !88, i64 0} +!88 = !{!"f16.0.width4.base528", !89} +!89 = !{!"f16.0.width8.base528", !90} +!90 = !{!"f16.0.width16.base528", !82} +!91 = !{!92, !92, i64 0} +!92 = !{!"f16.0.width4.base648", !93} +!93 = !{!"f16.0.width8.base648", !94} +!94 = !{!"f16.0.width16.base640", !95} +!95 = !{!"f16.0.width32.base640", !96} +!96 = !{!"f16.0.width64.base640", !97} +!97 = !{!"f16.0.width128.base640", !85} +!98 = !{!99, !99, i64 0} +!99 = !{!"f16.0.width4.base652", !93} +!100 = !{!101, !101, i64 0} +!101 = !{!"f16.0.width4.base656", !102} +!102 = !{!"f16.0.width8.base656", !103} +!103 = !{!"f16.0.width16.base656", !95} +!104 = !{!105, !105, i64 0} +!105 = !{!"f16.0.width4.base732", !106} +!106 = !{!"f16.0.width8.base728", !107} +!107 = !{!"f16.0.width16.base720", !108} +!108 = !{!"f16.0.width32.base704", !109} +!109 = !{!"f16.0.width64.base704", !97} +!110 = !{!111, !111, i64 0} +!111 = !{!"f16.0.width4.base736", !112} +!112 = !{!"f16.0.width8.base736", !113} +!113 = !{!"f16.0.width16.base736", !114} +!114 = !{!"f16.0.width32.base736", !109} +!115 = !{!116, !116, i64 0} +!116 = !{!"f16.0.width4.base740", !112} +!117 = !{!118, !118, i64 0} +!118 = !{!"f16.0.width4.base872", !119} +!119 = !{!"f16.0.width8.base872", !120} +!120 = !{!"f16.0.width16.base864", !121} +!121 = !{!"f16.0.width32.base864", !122} +!122 = !{!"f16.0.width64.base832", !123} +!123 = !{!"f16.0.width128.base768", !124} +!124 = !{!"f16.0.width256.base768", !86} +!125 = !{!126, !126, i64 0} +!126 = !{!"f16.0.width4.base968", !127} +!127 = !{!"f16.0.width8.base968", !128} +!128 = !{!"f16.0.width16.base960", !129} +!129 = !{!"f16.0.width32.base960", !130} +!130 = !{!"f16.0.width64.base960", !131} +!131 = !{!"f16.0.width128.base896", !124} +!132 = !{!133, !133, i64 0} +!133 = !{!"f16.0.width4.base1016", !134} +!134 = !{!"f16.0.width8.base1016", !135} +!135 = !{!"f16.0.width16.base1008", !136} +!136 = !{!"f16.0.width32.base992", !130} +!137 = !{!138, !138, i64 0} +!138 = !{!"f16.0.width4.base1020", !134} +!139 = !{!140, !140, i64 0} +!140 = !{!"f16.1.width4.base8", !141} +!141 = !{!"f16.1.width8.base8", !142} +!142 = !{!"f16.1.width16.base0", !143} +!143 = !{!"f16.1.width32.base0", !144} +!144 = !{!"f16.1.width64.base0", !145} +!145 = !{!"f16.1.width128.base0", !146} +!146 = !{!"f16.1.width256.base0", !147} +!147 = !{!"f16.1.width512.base0", !148} +!148 = !{!"f16.1.width1024.base0", !149} +!149 = !{!"f16.1", !12} +!150 = !{!151, !151, i64 0} +!151 = !{!"f16.1.width4.base12", !141} +!152 = !{!153, !153, i64 0} +!153 = !{!"f16.1.width4.base16", !154} +!154 = !{!"f16.1.width8.base16", !155} +!155 = !{!"f16.1.width16.base16", !143} +!156 = !{!157, !157, i64 0} +!157 = !{!"f16.1.width4.base20", !154} +!158 = !{!159, !159, i64 0} +!159 = !{!"f16.1.width4.base148", !160} +!160 = !{!"f16.1.width8.base144", !161} +!161 = !{!"f16.1.width16.base144", !162} +!162 = !{!"f16.1.width32.base128", !163} +!163 = !{!"f16.1.width64.base128", !164} +!164 = !{!"f16.1.width128.base128", !146} +!165 = !{!166, !166, i64 0} +!166 = !{!"f16.1.width4.base632", !167} +!167 = !{!"f16.1.width8.base632", !168} +!168 = !{!"f16.1.width16.base624", !169} +!169 = !{!"f16.1.width32.base608", !170} +!170 = !{!"f16.1.width64.base576", !171} +!171 = !{!"f16.1.width128.base512", !172} +!172 = !{!"f16.1.width256.base512", !173} +!173 = !{!"f16.1.width512.base512", !148}