Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -534,6 +534,12 @@ addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass); + // Disable f32->f64 extload as we can only generate this in one instruction + // under optsize. So its easier to pattern match (fpext (load)) for that + // case instead of needing to emit 2 instructions for extload in the + // non-optsize case. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + for (auto VT : { MVT::f32, MVT::f64 }) { // Use ANDPD to simulate FABS. setOperationAction(ISD::FABS, VT, Custom); Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -7521,14 +7521,6 @@ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; -def : Pat<(f64 (extloadf32 addr:$src)), - (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX512, OptForSize]>; - -def : Pat<(f64 (extloadf32 addr:$src)), - (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, - Requires<[HasAVX512, OptForSpeed]>; - def : Pat<(f32 (fpround FR64X:$src)), (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, Requires<[HasAVX512]>; Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -1251,13 +1251,6 @@ def : Pat<(fpextend (loadf32 addr:$src)), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[UseAVX, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, - Requires<[UseAVX, OptForSpeed]>; - let isCodeGenOnly = 1 in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1265,21 +1258,11 @@ XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (extloadf32 addr:$src))]>, + [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSS2SD.Folded]>; } // isCodeGenOnly = 1 -// extload f32 -> f64. This matches load+fpextend because we have a hack in -// the isel (PreprocessForFPConvert) that can introduce loads after dag -// combine. -// Since these loads aren't folded into the fpextend, we have to match it -// explicitly here. -def : Pat<(fpextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; - let hasSideEffects = 0 in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),