Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -4313,16 +4313,6 @@ let Predicates = [HasAVX512] in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VMOVSSZrm addr:$src)>; - - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (VMOVSDZrm addr:$src)>; - // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8f32 (X86vzload addr:$src)), @@ -4357,8 +4347,6 @@ // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (VMOVDI2PDIZrm addr:$src)>; def : Pat<(v8i32 (X86vzload addr:$src)), @@ -8608,8 +8596,6 @@ EVEX_CD8<32, CD8VH>; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), - (VCVTPH2PSZ128rm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), (VCVTPH2PSZ128rm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert @@ -9636,8 +9622,6 @@ let Predicates = [HasVLX, HasBWI] in { def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BWZ256rm) addr:$src)>; - def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWZ256rm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ256rm) addr:$src)>; } @@ -9645,15 +9629,11 @@ let Predicates = [HasVLX] in { def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), (!cast(OpcPrefix#WDZ256rm) addr:$src)>; - def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDZ256rm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), (!cast(OpcPrefix#DQZ256rm) addr:$src)>; - def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQZ256rm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ256rm) addr:$src)>; } @@ -9686,8 +9666,6 @@ (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; - def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))), @@ -9696,8 +9674,6 @@ let Predicates = [HasVLX] in { def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))), @@ -9705,8 +9681,6 @@ def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))), @@ -9716,8 +9690,6 @@ (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))), @@ -9725,8 +9697,6 @@ def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))), @@ -9736,8 +9706,6 @@ (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))), @@ -9746,8 +9714,6 @@ let Predicates = [HasVLX] in { def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))), @@ -9755,8 +9721,6 @@ def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))), @@ -9764,8 +9728,6 @@ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))), Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -939,13 +939,6 @@ def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>; def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>; -def vzmovl_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; -def vzmovl_v4i32 : PatFrag<(ops node:$src), - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; - def vzload_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzload node:$src)))>; Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -263,20 +263,6 @@ // Patterns let Predicates = [UseAVX] in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (VMOVSSrm addr:$src)>; - def : Pat<(v4f32 (X86vzload addr:$src)), - (VMOVSSrm addr:$src)>; - - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (VMOVSDrm addr:$src)>; - def : Pat<(v2f64 (X86vzload addr:$src)), - (VMOVSDrm addr:$src)>; - // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8f32 (X86vzload addr:$src)), @@ -313,16 +299,6 @@ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; } - - // MOVSSrm already zeros the high parts of the register. - def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (MOVSSrm addr:$src)>; -} - -let Predicates = [UseSSE2] in { - // MOVSDrm already zeros the high parts of the register. - def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (MOVSDrm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -4135,8 +4111,6 @@ // These instructions also write zeros in the high part of a 256-bit register. def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (VMOVDI2PDIrm addr:$src)>; def : Pat<(v8i32 (X86vzload addr:$src)), @@ -4151,8 +4125,6 @@ (MOV64toPQIrr GR64:$src)>; def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (MOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (MOVDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzload addr:$src)), (MOVDI2PDIrm addr:$src)>; } @@ -4944,8 +4916,6 @@ def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BWYrm) addr:$src)>; - def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWYrm) addr:$src)>; def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWYrm) addr:$src)>; } @@ -4969,15 +4939,11 @@ let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))), (!cast(OpcPrefix#WDYrm) addr:$src)>; - def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDYrm) addr:$src)>; def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BDYrm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))), @@ -4985,15 +4951,11 @@ def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), (!cast(OpcPrefix#DQYrm) addr:$src)>; - def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQYrm) addr:$src)>; def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))), @@ -5001,8 +4963,6 @@ def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#WQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))), @@ -5057,8 +5017,6 @@ (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#BWrm) addr:$src)>; - def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), @@ -5067,8 +5025,6 @@ let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BDrm) addr:$src)>; - def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), @@ -5076,8 +5032,6 @@ def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), (!cast(OpcPrefix#BQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#BQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#BQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv16i8 addr:$src))), @@ -5087,8 +5041,6 @@ (!cast(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#WDrm) addr:$src)>; - def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), @@ -5096,8 +5048,6 @@ def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#WQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))), - (!cast(OpcPrefix#WQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#WQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), @@ -5107,8 +5057,6 @@ (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#DQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), - (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), @@ -7314,8 +7262,6 @@ WriteCvtPS2PHYSt>, VEX_L; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))), - (VCVTPH2PSrm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), (VCVTPH2PSrm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert