Skip to content

Commit 145e5b4

Browse files
author
Elena Demikhovsky
committedFeb 23, 2015
restructured X86 scalar unary operation templates
I made the templates general, no need to define pattern separately for each instruction/intrinsic. Now only need to add r_Int pattern for AVX. llvm-svn: 230221
1 parent b4f08eb commit 145e5b4

File tree

1 file changed

+118
-164
lines changed

1 file changed

+118
-164
lines changed
 

‎llvm/lib/Target/X86/X86InstrSSE.td

+118-164
Original file line numberDiff line numberDiff line change
@@ -3344,56 +3344,106 @@ def SSE_RCPS : OpndItins<
33443344
>;
33453345
}
33463346

3347-
/// sse1_fp_unop_s - SSE1 unops in scalar form
3347+
/// sse_fp_unop_s - SSE1 unops in scalar form
33483348
/// For the non-AVX defs, we need $src1 to be tied to $dst because
33493349
/// the HW instructions are 2 operand / destructive.
3350-
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3351-
OpndItins itins> {
3352-
let Predicates = [HasAVX], hasSideEffects = 0 in {
3353-
def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst),
3354-
(ins FR32:$src1, FR32:$src2),
3355-
!strconcat("v", OpcodeStr,
3356-
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3357-
[]>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
3358-
let mayLoad = 1 in {
3359-
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
3360-
(ins FR32:$src1,f32mem:$src2),
3361-
!strconcat("v", OpcodeStr,
3362-
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3363-
[]>, VEX_4V, VEX_LIG,
3364-
Sched<[itins.Sched.Folded, ReadAfterLd]>;
3365-
let isCodeGenOnly = 1 in
3366-
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
3367-
(ins VR128:$src1, ssmem:$src2),
3368-
!strconcat("v", OpcodeStr,
3369-
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3370-
[]>, VEX_4V, VEX_LIG,
3371-
Sched<[itins.Sched.Folded, ReadAfterLd]>;
3350+
multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3351+
ValueType vt, ValueType ScalarVT,
3352+
X86MemOperand x86memop, Operand vec_memop,
3353+
ComplexPattern mem_cpat, Intrinsic Intr,
3354+
SDNode OpNode, OpndItins itins, Predicate target,
3355+
string Suffix> {
3356+
let hasSideEffects = 0 in {
3357+
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
3358+
!strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
3359+
[(set RC:$dst, (OpNode RC:$src1))], itins.rr>, Sched<[itins.Sched]>,
3360+
Requires<[target]>;
3361+
let mayLoad = 1 in
3362+
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1),
3363+
!strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"),
3364+
[(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm>,
3365+
Sched<[itins.Sched.Folded, ReadAfterLd]>,
3366+
Requires<[target, OptForSize]>;
3367+
3368+
let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
3369+
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
3370+
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3371+
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3372+
let mayLoad = 1 in
3373+
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2),
3374+
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3375+
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3376+
}
3377+
}
3378+
3379+
let Predicates = [target] in {
3380+
def : Pat<(vt (OpNode mem_cpat:$src)),
3381+
(vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int)
3382+
(vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>;
3383+
// These are unary operations, but they are modeled as having 2 source operands
3384+
// because the high elements of the destination are unchanged in SSE.
3385+
def : Pat<(Intr VR128:$src),
3386+
(!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;
3387+
def : Pat<(Intr (load addr:$src)),
3388+
(vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
3389+
addr:$src), VR128))>;
3390+
def : Pat<(Intr mem_cpat:$src),
3391+
(!cast<Instruction>(NAME#Suffix##m_Int)
3392+
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
33723393
}
33733394
}
33743395

3375-
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
3376-
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3377-
[(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
3378-
// For scalar unary operations, fold a load into the operation
3379-
// only in OptForSize mode. It eliminates an instruction, but it also
3380-
// eliminates a whole-register clobber (the load), so it introduces a
3381-
// partial register update condition.
3382-
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
3383-
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3384-
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
3385-
Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
3386-
let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
3387-
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
3388-
(ins VR128:$src1, VR128:$src2),
3389-
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
3390-
[], itins.rr>, Sched<[itins.Sched]>;
3391-
let mayLoad = 1, hasSideEffects = 0 in
3392-
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
3393-
(ins VR128:$src1, ssmem:$src2),
3394-
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
3395-
[], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3396+
multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
3397+
ValueType vt, ValueType ScalarVT,
3398+
X86MemOperand x86memop, Operand vec_memop,
3399+
ComplexPattern mem_cpat,
3400+
Intrinsic Intr, SDNode OpNode, OpndItins itins,
3401+
Predicate target, string Suffix> {
3402+
let hasSideEffects = 0 in {
3403+
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
3404+
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3405+
[], itins.rr>, Sched<[itins.Sched]>;
3406+
let mayLoad = 1 in
3407+
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
3408+
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3409+
[], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3410+
let isCodeGenOnly = 1 in {
3411+
// todo: uncomment when all r_Int forms will be added to X86InstrInfo.cpp
3412+
//def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
3413+
// (ins VR128:$src1, VR128:$src2),
3414+
// !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3415+
// []>, Sched<[itins.Sched.Folded]>;
3416+
let mayLoad = 1 in
3417+
def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
3418+
(ins VR128:$src1, vec_memop:$src2),
3419+
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3420+
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3421+
}
33963422
}
3423+
3424+
let Predicates = [target] in {
3425+
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
3426+
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
3427+
3428+
def : Pat<(vt (OpNode mem_cpat:$src)),
3429+
(!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
3430+
mem_cpat:$src)>;
3431+
3432+
// todo: use r_Int form when it will be ready
3433+
//def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int)
3434+
// (VT (IMPLICIT_DEF)), VR128:$src)>;
3435+
def : Pat<(Intr VR128:$src),
3436+
(vt (COPY_TO_REGCLASS(
3437+
!cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)),
3438+
(ScalarVT (COPY_TO_REGCLASS VR128:$src, RC))), VR128))>;
3439+
def : Pat<(Intr mem_cpat:$src),
3440+
(!cast<Instruction>("V"#NAME#Suffix##m_Int)
3441+
(vt (IMPLICIT_DEF)), mem_cpat:$src)>;
3442+
}
3443+
let Predicates = [target, OptForSize] in
3444+
def : Pat<(ScalarVT (OpNode (load addr:$src))),
3445+
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
3446+
addr:$src)>;
33973447
}
33983448

33993449
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -3472,57 +3522,6 @@ let Predicates = [HasAVX] in {
34723522
} // isCodeGenOnly = 1
34733523
}
34743524

3475-
/// sse2_fp_unop_s - SSE2 unops in scalar form.
3476-
// FIXME: Combine the following sse2 classes with the sse1 classes above.
3477-
// The only usage of these is for SQRT[S/P]D. See sse12_fp_binop* for example.
3478-
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
3479-
SDNode OpNode, OpndItins itins> {
3480-
let Predicates = [HasAVX], hasSideEffects = 0 in {
3481-
def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst),
3482-
(ins FR64:$src1, FR64:$src2),
3483-
!strconcat("v", OpcodeStr,
3484-
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3485-
[]>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
3486-
let mayLoad = 1 in {
3487-
def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
3488-
(ins FR64:$src1,f64mem:$src2),
3489-
!strconcat("v", OpcodeStr,
3490-
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3491-
[]>, VEX_4V, VEX_LIG,
3492-
Sched<[itins.Sched.Folded, ReadAfterLd]>;
3493-
let isCodeGenOnly = 1 in
3494-
def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
3495-
(ins VR128:$src1, sdmem:$src2),
3496-
!strconcat("v", OpcodeStr,
3497-
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3498-
[]>, VEX_4V, VEX_LIG,
3499-
Sched<[itins.Sched.Folded, ReadAfterLd]>;
3500-
}
3501-
}
3502-
3503-
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
3504-
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3505-
[(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
3506-
Sched<[itins.Sched]>;
3507-
// See the comments in sse1_fp_unop_s for why this is OptForSize.
3508-
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
3509-
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3510-
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
3511-
Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
3512-
let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
3513-
def SDr_Int :
3514-
SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
3515-
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
3516-
[], itins.rr>, Sched<[itins.Sched]>;
3517-
3518-
let mayLoad = 1, hasSideEffects = 0 in
3519-
def SDm_Int :
3520-
SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
3521-
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
3522-
[], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
3523-
} // isCodeGenOnly, Constraints
3524-
}
3525-
35263525
/// sse2_fp_unop_p - SSE2 unops in vector forms.
35273526
multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
35283527
SDNode OpNode, OpndItins itins> {
@@ -3559,6 +3558,30 @@ let Predicates = [HasAVX] in {
35593558
Sched<[itins.Sched.Folded]>;
35603559
}
35613560

3561+
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3562+
OpndItins itins> {
3563+
defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
3564+
ssmem, sse_load_f32,
3565+
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
3566+
itins, UseSSE1, "SS">, XS;
3567+
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
3568+
f32mem, ssmem, sse_load_f32,
3569+
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
3570+
itins, HasAVX, "SS">, XS, VEX_4V, VEX_LIG;
3571+
}
3572+
3573+
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
3574+
OpndItins itins> {
3575+
defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
3576+
sdmem, sse_load_f64,
3577+
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
3578+
OpNode, itins, UseSSE2, "SD">, XD;
3579+
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
3580+
f64mem, sdmem, sse_load_f64,
3581+
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
3582+
OpNode, itins, HasAVX, "SD">, XD, VEX_4V, VEX_LIG;
3583+
}
3584+
35623585
// Square root.
35633586
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
35643587
sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
@@ -3576,75 +3599,6 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
35763599
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
35773600
int_x86_avx_rcp_ps_256, SSE_RCPP>;
35783601

3579-
let Predicates = [UseAVX] in {
3580-
def : Pat<(f32 (fsqrt FR32:$src)),
3581-
(VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3582-
def : Pat<(f32 (fsqrt (load addr:$src))),
3583-
(VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3584-
Requires<[HasAVX, OptForSize]>;
3585-
def : Pat<(f64 (fsqrt FR64:$src)),
3586-
(VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
3587-
def : Pat<(f64 (fsqrt (load addr:$src))),
3588-
(VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
3589-
Requires<[HasAVX, OptForSize]>;
3590-
3591-
def : Pat<(f32 (X86frsqrt FR32:$src)),
3592-
(VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3593-
def : Pat<(f32 (X86frsqrt (load addr:$src))),
3594-
(VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3595-
Requires<[HasAVX, OptForSize]>;
3596-
3597-
def : Pat<(f32 (X86frcp FR32:$src)),
3598-
(VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3599-
def : Pat<(f32 (X86frcp (load addr:$src))),
3600-
(VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3601-
Requires<[HasAVX, OptForSize]>;
3602-
}
3603-
let Predicates = [UseAVX] in {
3604-
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
3605-
(COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
3606-
(COPY_TO_REGCLASS VR128:$src, FR32)),
3607-
VR128)>;
3608-
def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
3609-
(VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3610-
3611-
def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
3612-
(COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
3613-
(COPY_TO_REGCLASS VR128:$src, FR64)),
3614-
VR128)>;
3615-
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
3616-
(VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
3617-
}
3618-
3619-
let Predicates = [HasAVX] in {
3620-
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
3621-
(COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
3622-
(COPY_TO_REGCLASS VR128:$src, FR32)),
3623-
VR128)>;
3624-
def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
3625-
(VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3626-
3627-
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
3628-
(COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
3629-
(COPY_TO_REGCLASS VR128:$src, FR32)),
3630-
VR128)>;
3631-
def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
3632-
(VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3633-
}
3634-
3635-
// These are unary operations, but they are modeled as having 2 source operands
3636-
// because the high elements of the destination are unchanged in SSE.
3637-
let Predicates = [UseSSE1] in {
3638-
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
3639-
(RSQRTSSr_Int VR128:$src, VR128:$src)>;
3640-
def : Pat<(int_x86_sse_rcp_ss VR128:$src),
3641-
(RCPSSr_Int VR128:$src, VR128:$src)>;
3642-
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
3643-
(SQRTSSr_Int VR128:$src, VR128:$src)>;
3644-
def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
3645-
(SQRTSDr_Int VR128:$src, VR128:$src)>;
3646-
}
3647-
36483602
// There is no f64 version of the reciprocal approximation instructions.
36493603

36503604
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)
Please sign in to comment.