@@ -3344,56 +3344,106 @@ def SSE_RCPS : OpndItins<
3344
3344
>;
3345
3345
}
3346
3346
3347
- /// sse1_fp_unop_s - SSE1 unops in scalar form
3347
+ /// sse_fp_unop_s - SSE1 unops in scalar form
3348
3348
/// For the non-AVX defs, we need $src1 to be tied to $dst because
3349
3349
/// the HW instructions are 2 operand / destructive.
3350
- multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3351
- OpndItins itins> {
3352
- let Predicates = [HasAVX], hasSideEffects = 0 in {
3353
- def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
3354
- (ins FR32:$src1, FR32:$src2),
3355
- !strconcat("v", OpcodeStr,
3356
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3357
- []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
3358
- let mayLoad = 1 in {
3359
- def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
3360
- (ins FR32:$src1,f32mem:$src2),
3361
- !strconcat("v", OpcodeStr,
3362
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3363
- []>, VEX_4V, VEX_LIG,
3364
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
3365
- let isCodeGenOnly = 1 in
3366
- def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
3367
- (ins VR128:$src1, ssmem:$src2),
3368
- !strconcat("v", OpcodeStr,
3369
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3370
- []>, VEX_4V, VEX_LIG,
3371
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
3350
+ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3351
+ ValueType vt, ValueType ScalarVT,
3352
+ X86MemOperand x86memop, Operand vec_memop,
3353
+ ComplexPattern mem_cpat, Intrinsic Intr,
3354
+ SDNode OpNode, OpndItins itins, Predicate target,
3355
+ string Suffix> {
3356
+ let hasSideEffects = 0 in {
3357
+ def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
3358
+ !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
3359
+ [(set RC:$dst, (OpNode RC:$src1))], itins.rr>, Sched<[itins.Sched]>,
3360
+ Requires<[target]>;
3361
+ let mayLoad = 1 in
3362
+ def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
3363
+ !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
3364
+ [(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm>,
3365
+ Sched<[itins.Sched.Folded, ReadAfterLd]>,
3366
+ Requires<[target, OptForSize]>;
3367
+
3368
+ let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
3369
+ def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
3370
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3371
+ []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3372
+ let mayLoad = 1 in
3373
+ def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2),
3374
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3375
+ []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3376
+ }
3377
+ }
3378
+
3379
+ let Predicates = [target] in {
3380
+ def : Pat<(vt (OpNode mem_cpat:$src)),
3381
+ (vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int)
3382
+ (vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>;
3383
+ // These are unary operations, but they are modeled as having 2 source operands
3384
+ // because the high elements of the destination are unchanged in SSE.
3385
+ def : Pat<(Intr VR128:$src),
3386
+ (!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;
3387
+ def : Pat<(Intr (load addr:$src)),
3388
+ (vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
3389
+ addr:$src), VR128))>;
3390
+ def : Pat<(Intr mem_cpat:$src),
3391
+ (!cast<Instruction>(NAME#Suffix##m_Int)
3392
+ (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
3372
3393
}
3373
3394
}
3374
3395
3375
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
3376
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3377
- [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
3378
- // For scalar unary operations, fold a load into the operation
3379
- // only in OptForSize mode. It eliminates an instruction, but it also
3380
- // eliminates a whole-register clobber (the load), so it introduces a
3381
- // partial register update condition.
3382
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
3383
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3384
- [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
3385
- Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
3386
- let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
3387
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
3388
- (ins VR128:$src1, VR128:$src2),
3389
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
3390
- [], itins.rr>, Sched<[itins.Sched]>;
3391
- let mayLoad = 1, hasSideEffects = 0 in
3392
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
3393
- (ins VR128:$src1, ssmem:$src2),
3394
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
3395
- [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3396
+ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3397
+ ValueType vt, ValueType ScalarVT,
3398
+ X86MemOperand x86memop, Operand vec_memop,
3399
+ ComplexPattern mem_cpat,
3400
+ Intrinsic Intr, SDNode OpNode, OpndItins itins,
3401
+ Predicate target, string Suffix> {
3402
+ let hasSideEffects = 0 in {
3403
+ def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
3404
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3405
+ [], itins.rr>, Sched<[itins.Sched]>;
3406
+ let mayLoad = 1 in
3407
+ def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
3408
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3409
+ [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3410
+ let isCodeGenOnly = 1 in {
3411
+ // todo: uncomment when all r_Int forms will be added to X86InstrInfo.cpp
3412
+ //def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
3413
+ // (ins VR128:$src1, VR128:$src2),
3414
+ // !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3415
+ // []>, Sched<[itins.Sched.Folded]>;
3416
+ let mayLoad = 1 in
3417
+ def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
3418
+ (ins VR128:$src1, vec_memop:$src2),
3419
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3420
+ []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3421
+ }
3396
3422
}
3423
+
3424
+ let Predicates = [target] in {
3425
+ def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
3426
+ (ScalarVT (IMPLICIT_DEF)), RC:$src)>;
3427
+
3428
+ def : Pat<(vt (OpNode mem_cpat:$src)),
3429
+ (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
3430
+ mem_cpat:$src)>;
3431
+
3432
+ // todo: use r_Int form when it will be ready
3433
+ //def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int)
3434
+ // (VT (IMPLICIT_DEF)), VR128:$src)>;
3435
+ def : Pat<(Intr VR128:$src),
3436
+ (vt (COPY_TO_REGCLASS(
3437
+ !cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)),
3438
+ (ScalarVT (COPY_TO_REGCLASS VR128:$src, RC))), VR128))>;
3439
+ def : Pat<(Intr mem_cpat:$src),
3440
+ (!cast<Instruction>("V"#NAME#Suffix##m_Int)
3441
+ (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
3442
+ }
3443
+ let Predicates = [target, OptForSize] in
3444
+ def : Pat<(ScalarVT (OpNode (load addr:$src))),
3445
+ (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
3446
+ addr:$src)>;
3397
3447
}
3398
3448
3399
3449
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -3472,57 +3522,6 @@ let Predicates = [HasAVX] in {
3472
3522
} // isCodeGenOnly = 1
3473
3523
}
3474
3524
3475
- /// sse2_fp_unop_s - SSE2 unops in scalar form.
3476
- // FIXME: Combine the following sse2 classes with the sse1 classes above.
3477
- // The only usage of these is for SQRT[S/P]D. See sse12_fp_binop* for example.
3478
- multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
3479
- SDNode OpNode, OpndItins itins> {
3480
- let Predicates = [HasAVX], hasSideEffects = 0 in {
3481
- def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
3482
- (ins FR64:$src1, FR64:$src2),
3483
- !strconcat("v", OpcodeStr,
3484
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3485
- []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
3486
- let mayLoad = 1 in {
3487
- def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
3488
- (ins FR64:$src1,f64mem:$src2),
3489
- !strconcat("v", OpcodeStr,
3490
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3491
- []>, VEX_4V, VEX_LIG,
3492
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
3493
- let isCodeGenOnly = 1 in
3494
- def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
3495
- (ins VR128:$src1, sdmem:$src2),
3496
- !strconcat("v", OpcodeStr,
3497
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3498
- []>, VEX_4V, VEX_LIG,
3499
- Sched<[itins.Sched.Folded, ReadAfterLd]>;
3500
- }
3501
- }
3502
-
3503
- def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
3504
- !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3505
- [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
3506
- Sched<[itins.Sched]>;
3507
- // See the comments in sse1_fp_unop_s for why this is OptForSize.
3508
- def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
3509
- !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3510
- [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
3511
- Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
3512
- let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
3513
- def SDr_Int :
3514
- SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
3515
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
3516
- [], itins.rr>, Sched<[itins.Sched]>;
3517
-
3518
- let mayLoad = 1, hasSideEffects = 0 in
3519
- def SDm_Int :
3520
- SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
3521
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
3522
- [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3523
- } // isCodeGenOnly, Constraints
3524
- }
3525
-
3526
3525
/// sse2_fp_unop_p - SSE2 unops in vector forms.
3527
3526
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
3528
3527
SDNode OpNode, OpndItins itins> {
@@ -3559,6 +3558,30 @@ let Predicates = [HasAVX] in {
3559
3558
Sched<[itins.Sched.Folded]>;
3560
3559
}
3561
3560
3561
+ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3562
+ OpndItins itins> {
3563
+ defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
3564
+ ssmem, sse_load_f32,
3565
+ !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
3566
+ itins, UseSSE1, "SS">, XS;
3567
+ defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
3568
+ f32mem, ssmem, sse_load_f32,
3569
+ !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
3570
+ itins, HasAVX, "SS">, XS, VEX_4V, VEX_LIG;
3571
+ }
3572
+
3573
+ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3574
+ OpndItins itins> {
3575
+ defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
3576
+ sdmem, sse_load_f64,
3577
+ !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
3578
+ OpNode, itins, UseSSE2, "SD">, XD;
3579
+ defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
3580
+ f64mem, sdmem, sse_load_f64,
3581
+ !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
3582
+ OpNode, itins, HasAVX, "SD">, XD, VEX_4V, VEX_LIG;
3583
+ }
3584
+
3562
3585
// Square root.
3563
3586
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
3564
3587
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
@@ -3576,75 +3599,6 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
3576
3599
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
3577
3600
int_x86_avx_rcp_ps_256, SSE_RCPP>;
3578
3601
3579
- let Predicates = [UseAVX] in {
3580
- def : Pat<(f32 (fsqrt FR32:$src)),
3581
- (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3582
- def : Pat<(f32 (fsqrt (load addr:$src))),
3583
- (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3584
- Requires<[HasAVX, OptForSize]>;
3585
- def : Pat<(f64 (fsqrt FR64:$src)),
3586
- (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
3587
- def : Pat<(f64 (fsqrt (load addr:$src))),
3588
- (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
3589
- Requires<[HasAVX, OptForSize]>;
3590
-
3591
- def : Pat<(f32 (X86frsqrt FR32:$src)),
3592
- (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3593
- def : Pat<(f32 (X86frsqrt (load addr:$src))),
3594
- (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3595
- Requires<[HasAVX, OptForSize]>;
3596
-
3597
- def : Pat<(f32 (X86frcp FR32:$src)),
3598
- (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3599
- def : Pat<(f32 (X86frcp (load addr:$src))),
3600
- (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3601
- Requires<[HasAVX, OptForSize]>;
3602
- }
3603
- let Predicates = [UseAVX] in {
3604
- def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
3605
- (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
3606
- (COPY_TO_REGCLASS VR128:$src, FR32)),
3607
- VR128)>;
3608
- def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
3609
- (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3610
-
3611
- def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
3612
- (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
3613
- (COPY_TO_REGCLASS VR128:$src, FR64)),
3614
- VR128)>;
3615
- def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
3616
- (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
3617
- }
3618
-
3619
- let Predicates = [HasAVX] in {
3620
- def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
3621
- (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
3622
- (COPY_TO_REGCLASS VR128:$src, FR32)),
3623
- VR128)>;
3624
- def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
3625
- (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3626
-
3627
- def : Pat<(int_x86_sse_rcp_ss VR128:$src),
3628
- (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
3629
- (COPY_TO_REGCLASS VR128:$src, FR32)),
3630
- VR128)>;
3631
- def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
3632
- (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3633
- }
3634
-
3635
- // These are unary operations, but they are modeled as having 2 source operands
3636
- // because the high elements of the destination are unchanged in SSE.
3637
- let Predicates = [UseSSE1] in {
3638
- def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
3639
- (RSQRTSSr_Int VR128:$src, VR128:$src)>;
3640
- def : Pat<(int_x86_sse_rcp_ss VR128:$src),
3641
- (RCPSSr_Int VR128:$src, VR128:$src)>;
3642
- def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
3643
- (SQRTSSr_Int VR128:$src, VR128:$src)>;
3644
- def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
3645
- (SQRTSDr_Int VR128:$src, VR128:$src)>;
3646
- }
3647
-
3648
3602
// There is no f64 version of the reciprocal approximation instructions.
3649
3603
3650
3604
//===----------------------------------------------------------------------===//
0 commit comments