Index: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td +++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -162,130 +162,146 @@ // Some Common Instruction Class Templates //===----------------------------------------------------------------------===// +// Template for instructions which take three int64, int32, or int16 args. +// The instructions are named "" (e.g. "add.s64"). multiclass I3 { - def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - Int64Regs:$b))]>; - def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - Int32Regs:$b))]>; - def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - Int16Regs:$b))]>; - def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; + def i64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def i64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def i16ri : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; } +// Template for instructions which take 3 int32 args. The instructions are +// named ".s32" (e.g. "addc.cc.s32"). multiclass ADD_SUB_INT_32 { - def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - Int32Regs:$b))]>; - def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; } +// Template for instructions which take three fp64 or fp32 args. The +// instructions are named ".f" (e.g. "add.f64"). +// +// Also defines ftz (flush subnormal inputs and results to sign-preserving +// zero) variants for fp32 functions. multiclass F3 { - def f64rr : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, - (OpNode Float64Regs:$a, Float64Regs:$b))]>, - Requires<[allowFMA]>; - def f64ri : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, - (OpNode Float64Regs:$a, fpimm:$b))]>, - Requires<[allowFMA]>; - def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[allowFMA, doF32FTZ]>; - def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[allowFMA, doF32FTZ]>; - def f32rr : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[allowFMA]>; - def f32ri : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[allowFMA]>; + def f64rr : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, + Requires<[allowFMA]>; + def f64ri : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; + def f32rr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA, doF32FTZ]>; + def f32ri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA, doF32FTZ]>; + def f32rr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA]>; + def f32ri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; } +// Same as F3, but defines ".rn" variants (round to nearest even). multiclass F3_rn { - def f64rr : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, - (OpNode Float64Regs:$a, Float64Regs:$b))]>, - Requires<[noFMA]>; - def f64ri : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, - (OpNode Float64Regs:$a, fpimm:$b))]>, - Requires<[noFMA]>; - def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[noFMA, doF32FTZ]>; - def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[noFMA, doF32FTZ]>; - def f32rr : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[noFMA]>; - def f32ri : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, - (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[noFMA]>; -} - + def f64rr : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, + Requires<[noFMA]>; + def f64ri : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, + Requires<[noFMA]>; + def f32rr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[noFMA, doF32FTZ]>; + def f32ri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[noFMA, doF32FTZ]>; + def f32rr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[noFMA]>; + def f32ri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[noFMA]>; +} + +// Template for operations which take two f32 or f64 operands. Provides three +// instructions: .f64, .f32, and .ftz.f32 (flush +// subnormal inputs and results to zero). multiclass F2 { - def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), - !strconcat(OpcStr, ".f64 \t$dst, $a;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; + def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), + !strconcat(OpcStr, ".f64 \t$dst, $a;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, - Requires<[doF32FTZ]>; - def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), - !strconcat(OpcStr, ".f32 \t$dst, $a;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, + Requires<[doF32FTZ]>; + def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + !strconcat(OpcStr, ".f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; } //===----------------------------------------------------------------------===// @@ -293,160 +309,239 @@ //===----------------------------------------------------------------------===// //----------------------------------- -// General Type Conversion +// Type Conversion //----------------------------------- let hasSideEffects = 0 in { -// Generate a cvt to the given type from all possible types. -// Each instance takes a CvtMode immediate that defines the conversion mode to -// use. It can be CvtNONE to omit a conversion mode. -multiclass CVT_FROM_ALL { - def _s16 : NVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".s16\t$dst, $src;"), - []>; - def _u16 : NVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".u16\t$dst, $src;"), - []>; - def _f16 : NVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".f16\t$dst, $src;"), - []>; - def _s32 : NVPTXInst<(outs RC:$dst), - (ins Int32Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".s32\t$dst, $src;"), - []>; - def _u32 : NVPTXInst<(outs RC:$dst), - (ins Int32Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".u32\t$dst, $src;"), - []>; - def _s64 : NVPTXInst<(outs RC:$dst), - (ins Int64Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".s64\t$dst, $src;"), - []>; - def _u64 : NVPTXInst<(outs RC:$dst), - (ins Int64Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".u64\t$dst, $src;"), - []>; - def _f32 : NVPTXInst<(outs RC:$dst), - (ins Float32Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".f32\t$dst, $src;"), - []>; - def _f64 : NVPTXInst<(outs RC:$dst), - (ins Float64Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".f64\t$dst, $src;"), - []>; -} - -// Generate a cvt to all possible types. -defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; -defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; -defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; -defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; -defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; -defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; -defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; -defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; -defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; - -// This set of cvt is different from the above. The type of the source -// and target are the same. -// -def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "cvt.s16.s8 \t$dst, $src;", []>; -def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), - "cvt.s32.s8 \t$dst, $src;", []>; -def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), - "cvt.s32.s16 \t$dst, $src;", []>; -def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "cvt.s64.s8 \t$dst, $src;", []>; -def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "cvt.s64.s16 \t$dst, $src;", []>; -def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "cvt.s64.s32 \t$dst, $src;", []>; + // Generate a cvt to the given type from all possible types. Each instance + // takes a CvtMode immediate that defines the conversion mode to use. It can + // be CvtNONE to omit a conversion mode. + multiclass CVT_FROM_ALL { + def _s16 : + NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s16\t$dst, $src;"), []>; + def _u16 : + NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u16\t$dst, $src;"), []>; + def _f16 : + NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f16\t$dst, $src;"), []>; + def _s32 : + NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s32\t$dst, $src;"), []>; + def _u32 : + NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u32\t$dst, $src;"), []>; + def _s64 : + NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s64\t$dst, $src;"), []>; + def _u64 : + NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u64\t$dst, $src;"), []>; + def _f32 : + NVPTXInst<(outs RC:$dst), + (ins Float32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f32\t$dst, $src;"), []>; + def _f64 : + NVPTXInst<(outs RC:$dst), + (ins Float64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f64\t$dst, $src;"), []>; + } + + // Generate cvts from all types to all types. + defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; + defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; + defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; + defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; + defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; + defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; + defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; + defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; + defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; + + // These cvts are different from those above: The source and dest registers + // are of the same type. + def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.s16.s8 \t$dst, $src;", []>; + def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s8 \t$dst, $src;", []>; + def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s16 \t$dst, $src;", []>; + def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s8 \t$dst, $src;", []>; + def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s16 \t$dst, $src;", []>; + def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s32 \t$dst, $src;", []>; } //----------------------------------- // Integer Arithmetic //----------------------------------- +// Template for xor masquerading as int1 arithmetic. multiclass ADD_SUB_i1 { def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; } +// int1 addition and subtraction are both just xor. defm ADD_i1 : ADD_SUB_i1; defm SUB_i1 : ADD_SUB_i1; - +// int16, int32, and int64 signed addition. Since nvptx is 2's compliment, we +// also use these for unsigned arithmetic. defm ADD : I3<"add.s", add>; defm SUB : I3<"sub.s", sub>; +// int32 addition and subtraction with carry-out. +// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; +// int32 addition and subtraction with carry-in and carry-out. defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; -//mul.wide PTX instruction +defm MULT : I3<"mul.lo.s", mul>; + +defm MULTHS : I3<"mul.hi.s", mulhs>; +defm MULTHU : I3<"mul.hi.u", mulhu>; + +defm SDIV : I3<"div.s", sdiv>; +defm UDIV : I3<"div.u", udiv>; + +// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM +// will lower it. +defm SREM : I3<"rem.s", srem>; +defm UREM : I3<"rem.u", urem>; + + +// +// Wide multiplication +// +def MULWIDES64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; +def MULWIDES64Imm : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; +def MULWIDES64Imm64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; + +def MULWIDEU64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; +def MULWIDEU64Imm : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; +def MULWIDEU64Imm64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; + +def MULWIDES32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; +def MULWIDES32Imm : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; +def MULWIDES32Imm32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; + +def MULWIDEU32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; + +def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; +def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; +def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; + +// Matchers for signed, unsigned mul.wide ISD nodes. +def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), + (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), + (MULWIDES32Imm Int16Regs:$a, imm:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), + (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), + (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, + Requires<[doMulWide]>; + +def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), + (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), + (MULWIDES64Imm Int32Regs:$a, imm:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), + (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), + (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, + Requires<[doMulWide]>; + +// Predicates used for converting some patterns to mul.wide. def SInt32Const : PatLeaf<(imm), [{ const APInt &v = N->getAPIntValue(); - if (v.isSignedIntN(32)) - return true; - return false; + return v.isSignedIntN(32); }]>; def UInt32Const : PatLeaf<(imm), [{ const APInt &v = N->getAPIntValue(); - if (v.isIntN(32)) - return true; - return false; + return v.isIntN(32); }]>; def SInt16Const : PatLeaf<(imm), [{ const APInt &v = N->getAPIntValue(); - if (v.isSignedIntN(16)) - return true; - return false; + return v.isSignedIntN(16); }]>; def UInt16Const : PatLeaf<(imm), [{ const APInt &v = N->getAPIntValue(); - if (v.isIntN(16)) - return true; - return false; + return v.isIntN(16); }]>; def Int5Const : PatLeaf<(imm), [{ + // Check if 0 <= v < 32; only then will the result of (x << v) be an int32. const APInt &v = N->getAPIntValue(); - // Check if 0 <= v < 32 - // Only then the result from (x << v) will be i32 - if (v.sge(0) && v.slt(32)) - return true; - return false; + return v.sge(0) && v.slt(32); }]>; def Int4Const : PatLeaf<(imm), [{ + // Check if 0 <= v < 16; only then will the result of (x << v) be an int16. const APInt &v = N->getAPIntValue(); - // Check if 0 <= v < 16 - // Only then the result from (x << v) will be i16 - if (v.sge(0) && v.slt(16)) - return true; - return false; + return v.sge(0) && v.slt(16); }]>; def SHL2MUL32 : SDNodeXFormgetTargetConstant(temp.shl(v), SDLoc(N), MVT::i16); }]>; -def MULWIDES64 - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - "mul.wide.s32 \t$dst, $a, $b;", []>; -def MULWIDES64Imm - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - "mul.wide.s32 \t$dst, $a, $b;", []>; -def MULWIDES64Imm64 - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), - "mul.wide.s32 \t$dst, $a, $b;", []>; - -def MULWIDEU64 - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - "mul.wide.u32 \t$dst, $a, $b;", []>; -def MULWIDEU64Imm - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - "mul.wide.u32 \t$dst, $a, $b;", []>; -def MULWIDEU64Imm64 - : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), - "mul.wide.u32 \t$dst, $a, $b;", []>; - -def MULWIDES32 - : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - "mul.wide.s16 \t$dst, $a, $b;", []>; -def MULWIDES32Imm - : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - "mul.wide.s16 \t$dst, $a, $b;", []>; -def MULWIDES32Imm32 - : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - "mul.wide.s16 \t$dst, $a, $b;", []>; - -def MULWIDEU32 - : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - "mul.wide.u16 \t$dst, $a, $b;", []>; -def MULWIDEU32Imm - : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - "mul.wide.u16 \t$dst, $a, $b;", []>; -def MULWIDEU32Imm32 - : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - "mul.wide.u16 \t$dst, $a, $b;", []>; - +// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide. def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; +// Convert "sign/zero-extend then multiply" to mul.wide. def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, - Requires<[doMulWide]>; + Requires<[doMulWide]>; def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, - Requires<[doMulWide]>; - - -def SDTMulWide - : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; -def mul_wide_signed - : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; -def mul_wide_unsigned - : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; - -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), - (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), - (MULWIDES32Imm Int16Regs:$a, imm:$b)>, - Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), - (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), - (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, - Requires<[doMulWide]>; - - -def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), - (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), - (MULWIDES64Imm Int32Regs:$a, imm:$b)>, - Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), - (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), - (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, - Requires<[doMulWide]>; -defm MULT : I3<"mul.lo.s", mul>; - -defm MULTHS : I3<"mul.hi.s", mulhs>; -defm MULTHU : I3<"mul.hi.u", mulhu>; - -defm SDIV : I3<"div.s", sdiv>; -defm UDIV : I3<"div.u", udiv>; - -defm SREM : I3<"rem.s", srem>; -// The ri version will not be selected as DAGCombiner::visitSREM will lower it. -defm UREM : I3<"rem.u", urem>; -// The ri version will not be selected as DAGCombiner::visitUREM will lower it. - -def SDTIMAD - : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, - SDTCisInt<2>, SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>; -def imad - : SDNode<"NVPTXISD::IMAD", SDTIMAD>; - -def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, - (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; -def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, - (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; -def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, - (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; -def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, i16imm:$b, i16imm:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, - (imad Int16Regs:$a, imm:$b, imm:$c))]>; - -def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, - (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; -def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, - (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; -def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, - (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; -def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, i32imm:$b, i32imm:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, - (imad Int32Regs:$a, imm:$b, imm:$c))]>; - -def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, - (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; -def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, - (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; -def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, - (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; -def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, i64imm:$b, i64imm:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, - (imad Int64Regs:$a, imm:$b, imm:$c))]>; - -def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "neg.s16 \t$dst, $src;", - [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; -def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), - "neg.s32 \t$dst, $src;", - [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; -def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "neg.s64 \t$dst, $src;", - [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; +// +// Integer multiply-add +// +def SDTIMAD : + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, + SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +def imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>; + +def MAD16rrr : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; +def MAD16rri : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; +def MAD16rir : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; +def MAD16rii : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>; + +def MAD32rrr : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; +def MAD32rri : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; +def MAD32rir : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; +def MAD32rii : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]>; + +def MAD64rrr : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; +def MAD64rri : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; +def MAD64rir : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; +def MAD64rii : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>; + +def INEG16 : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "neg.s16 \t$dst, $src;", + [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; +def INEG32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "neg.s32 \t$dst, $src;", + [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; +def INEG64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "neg.s64 \t$dst, $src;", + [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; //----------------------------------- // Floating Point Arithmetic @@ -677,17 +690,13 @@ // Constant 1.0f def FloatConst1 : PatLeaf<(fpimm), [{ - if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle) - return false; - float f = (float)N->getValueAPF().convertToFloat(); - return (f==1.0f); + return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle && + N->getValueAPF().convertToFloat() == 1.0f; }]>; -// Constand (double)1.0 +// Constant 1.0 (double) def DoubleConst1 : PatLeaf<(fpimm), [{ - if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble) - return false; - double d = (double)N->getValueAPF().convertToDouble(); - return (d==1.0); + return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble && + N->getValueAPF().convertToDouble() == 1.0; }]>; defm FADD : F3<"add", fadd>; @@ -698,157 +707,157 @@ defm FSUB_rn : F3_rn<"sub", fsub>; defm FMUL_rn : F3_rn<"mul", fmul>; -defm FABS : F2<"abs", fabs>; -defm FNEG : F2<"neg", fneg>; +defm FABS : F2<"abs", fabs>; +defm FNEG : F2<"neg", fneg>; defm FSQRT : F2<"sqrt.rn", fsqrt>; // // F64 division // -def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), - (ins f64imm:$a, Float64Regs:$b), - "rcp.rn.f64 \t$dst, $b;", - [(set Float64Regs:$dst, - (fdiv DoubleConst1:$a, Float64Regs:$b))]>; -def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, - (fdiv Float64Regs:$a, Float64Regs:$b))]>; -def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, - (fdiv Float64Regs:$a, fpimm:$b))]>; +def FDIV641r : + NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, Float64Regs:$b), + "rcp.rn.f64 \t$dst, $b;", + [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>; +def FDIV64rr : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>; +def FDIV64ri : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>; // // F32 Approximate reciprocal // -def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, - (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX, doF32FTZ]>; -def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.f32 \t$dst, $b;", - [(set Float32Regs:$dst, - (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX]>; +def FDIV321r_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV321r : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; // // F32 Approximate division // -def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.approx.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX, doF32FTZ]>; -def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.approx.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_APPROX, doF32FTZ]>; -def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.approx.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX]>; -def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.approx.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_APPROX]>; +def FDIV32approxrr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV32approxri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.approx.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV32approxrr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; +def FDIV32approxri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.approx.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_APPROX]>; // // F32 Semi-accurate reciprocal // // rcp.approx gives the same result as div.full(1.0f, a) and is faster. // -def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, - (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL, doF32FTZ]>; -def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.f32 \t$dst, $b;", - [(set Float32Regs:$dst, - (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL]>; +def FDIV321r_approx_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV321r_approx : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; // // F32 Semi-accurate division // -def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.full.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL, doF32FTZ]>; -def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.full.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_FULL, doF32FTZ]>; -def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.full.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL]>; -def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.full.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_FULL]>; +def FDIV32rr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32ri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32rr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; +def FDIV32ri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL]>; // // F32 Accurate reciprocal // -def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.rn.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, - (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[reqPTX20, doF32FTZ]>; -def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.rn.f32 \t$dst, $b;", - [(set Float32Regs:$dst, - (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[reqPTX20]>; +def FDIV321r_prec_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20, doF32FTZ]>; +def FDIV321r_prec : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; // // F32 Accurate division // -def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[doF32FTZ, reqPTX20]>; -def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[doF32FTZ, reqPTX20]>; -def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[reqPTX20]>; -def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, - (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[reqPTX20]>; +def FDIV32rr_prec_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32ri_prec_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32rr_prec : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; +def FDIV32ri_prec : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[reqPTX20]>; // // F32 rsqrt @@ -857,68 +866,39 @@ def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b), "rsqrt.approx.f32 \t$dst, $b;", []>; +// Convert 1.0f/sqrt(x) to rsqrt.approx.f32. (There is an rsqrt.approx.f64, but +// it's emulated in software.) def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)), (RSQRTF32approx1r Float32Regs:$b)>, Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>; -multiclass FPCONTRACT32 { - def rrr : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, - (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>, - Requires<[Pred]>; - def rri : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, - (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>, - Requires<[Pred]>; - def rir : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, - (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>, - Requires<[Pred]>; - def rii : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b, f32imm:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, - (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>, - Requires<[Pred]>; -} - -multiclass FPCONTRACT64 { - def rrr : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, - (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>, - Requires<[Pred]>; - def rri : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, - (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>, - Requires<[Pred]>; - def rir : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, - (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>, - Requires<[Pred]>; - def rii : NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b, f64imm:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, - (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>, - Requires<[Pred]>; -} - -defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>; -defm FMA32 : FPCONTRACT32<"fma.rn.f32", true>; -defm FMA64 : FPCONTRACT64<"fma.rn.f64", true>; +multiclass FMA { + def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>, + Requires<[Pred]>; + def rri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, ImmCls:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>, + Requires<[Pred]>; + def rir : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, RC:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>, + Requires<[Pred]>; + def rii : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, ImmCls:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>, + Requires<[Pred]>; +} + +defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; +defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>; +defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>; +// sin/cos def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "sin.approx.f32 \t$dst, $src;", [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; @@ -926,8 +906,8 @@ "cos.approx.f32 \t$dst, $src;", [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; -// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)) -// e.g. "poor man's fmod()" +// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)), +// i.e. "poor man's fmod()" // frem - f32 FTZ def : Pat<(frem Float32Regs:$x, Float32Regs:$y), @@ -962,183 +942,152 @@ fpimm:$y))>; //----------------------------------- -// Logical Arithmetic +// Bitwise operations //----------------------------------- -multiclass LOG_FORMAT { - def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), - !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; - def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), - !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; - def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - Int16Regs:$b))]>; - def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; - def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - Int32Regs:$b))]>; - def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - Int64Regs:$b))]>; - def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; -} - -defm OR : LOG_FORMAT<"or", or>; -defm AND : LOG_FORMAT<"and", and>; -defm XOR : LOG_FORMAT<"xor", xor>; +// Template for three-arg bitwise operations. Takes three args, Creates .b16, +// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr. +multiclass BITWISE { + def b1rr : + NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + def b1ri : + NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; + def b16rr : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def b16ri : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def b32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def b32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def b64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def b64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; +} + +defm OR : BITWISE<"or", or>; +defm AND : BITWISE<"and", and>; +defm XOR : BITWISE<"xor", xor>; -def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), +def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), "not.pred \t$dst, $src;", [(set Int1Regs:$dst, (not Int1Regs:$src))]>; -def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), +def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "not.b16 \t$dst, $src;", [(set Int16Regs:$dst, (not Int16Regs:$src))]>; -def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), +def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), "not.b32 \t$dst, $src;", [(set Int32Regs:$dst, (not Int32Regs:$src))]>; -def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "not.b64 \t$dst, $src;", - [(set Int64Regs:$dst, (not Int64Regs:$src))]>; - -// For shifts, the second src operand must be 32-bit value -multiclass LSHIFT_FORMAT { - def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - Int32Regs:$b))]>; - def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - (i32 imm:$b)))]>; - def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - Int32Regs:$b))]>; - def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - (i32 imm:$b)))]>; - def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 imm:$a), - (i32 imm:$b)))]>; - def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - Int32Regs:$b))]>; - def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - (i32 imm:$b)))]>; -} - -defm SHL : LSHIFT_FORMAT<"shl.b", shl>; - -// For shifts, the second src operand must be 32-bit value -// Need to add cvt for the 8-bits. -multiclass RSHIFT_FORMAT { - def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - Int32Regs:$b))]>; - def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, - (i32 imm:$b)))]>; - def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - Int32Regs:$b))]>; - def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, - (i32 imm:$b)))]>; - def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 imm:$a), - (i32 imm:$b)))]>; - def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, - Int32Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - Int32Regs:$b))]>; - def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, - (i32 imm:$b)))]>; -} - -defm SRA : RSHIFT_FORMAT<"shr.s", sra>; -defm SRL : RSHIFT_FORMAT<"shr.u", srl>; +def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "not.b64 \t$dst, $src;", + [(set Int64Regs:$dst, (not Int64Regs:$src))]>; + +// Template for left/right shifts. Takes three operands, +// [dest (reg), src (reg), shift (reg or imm)]. +// dest and src may be int64, int32, or int16, but shift is always int32. +// +// This template also defines a 32-bit shift (imm, imm) instruction. +multiclass SHIFT { + def i64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>; + def i64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>; + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>; + def i32ii : + NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; + def i16rr : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>; + def i16ri : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; +} + +defm SHL : SHIFT<"shl.b", shl>; +defm SRA : SHIFT<"shr.s", sra>; +defm SRL : SHIFT<"shr.u", srl>; // -// Rotate: use ptx shf instruction if available. +// Rotate: Use ptx shf instruction if available. // // 32 bit r2 = rotl r1, n // => // r2 = shf.l r1, r1, n -def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, i32imm:$amt), - "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, - Requires<[hasHWROT32]> ; - -def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, Int32Regs:$amt), - "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[hasHWROT32]>; +def ROTL32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTL32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]>; // 32 bit r2 = rotr r1, n // => // r2 = shf.r r1, r1, n -def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, i32imm:$amt), - "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, - Requires<[hasHWROT32]>; - -def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, Int32Regs:$amt), - "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[hasHWROT32]>; - -// -// Rotate: if ptx shf instruction is not available, then use shift+add -// -// 32bit -def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), - !strconcat("{{\n\t", - !strconcat(".reg .b32 %lhs;\n\t", - !strconcat(".reg .b32 %rhs;\n\t", - !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t", - !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t", - !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", - !strconcat("}}", ""))))))), - []>; +def ROTR32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTR32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]>; + +// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. +def ROT32imm_sw : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + "shl.b32 \t%lhs, $src, $amt1;\n\t" + "shr.b32 \t%rhs, $src, $amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; def SUB_FRM_32 : SDNodeXFormgetTargetConstant(32-N->getZExtValue(), SDLoc(N), MVT::i32); + return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); }]>; def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), @@ -1148,45 +1097,48 @@ (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, Requires<[noHWROT32]>; -def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, - Int32Regs:$amt), - !strconcat("{{\n\t", - !strconcat(".reg .b32 %lhs;\n\t", - !strconcat(".reg .b32 %rhs;\n\t", - !strconcat(".reg .b32 %amt2;\n\t", - !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", - !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", - !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", - !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", - !strconcat("}}", ""))))))))), - [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[noHWROT32]>; - -def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, - Int32Regs:$amt), - !strconcat("{{\n\t", - !strconcat(".reg .b32 %lhs;\n\t", - !strconcat(".reg .b32 %rhs;\n\t", - !strconcat(".reg .b32 %amt2;\n\t", - !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", - !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", - !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", - !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", - !strconcat("}}", ""))))))))), - [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[noHWROT32]>; - -// 64bit -def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, - i32imm:$amt1, i32imm:$amt2), - !strconcat("{{\n\t", - !strconcat(".reg .b64 %lhs;\n\t", - !strconcat(".reg .b64 %rhs;\n\t", - !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t", - !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t", - !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", - !strconcat("}}", ""))))))), - []>; +// 32-bit software rotate left by register. +def ROTL32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shl.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shr.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[noHWROT32]>; + +// 32-bit software rotate right by register. +def ROTR32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shr.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shl.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[noHWROT32]>; + +// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. +def ROT64imm_sw : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + "shl.b64 \t%lhs, $src, $amt1;\n\t" + "shr.b64 \t%rhs, $src, $amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; def SUB_FRM_64 : SDNodeXFormgetTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); @@ -1197,37 +1149,70 @@ def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; -def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, - Int32Regs:$amt), - !strconcat("{{\n\t", - !strconcat(".reg .b64 %lhs;\n\t", - !strconcat(".reg .b64 %rhs;\n\t", - !strconcat(".reg .u32 %amt2;\n\t", - !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t", - !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", - !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t", - !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", - !strconcat("}}", ""))))))))), - [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; - -def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, - Int32Regs:$amt), - !strconcat("{{\n\t", - !strconcat(".reg .b64 %lhs;\n\t", - !strconcat(".reg .b64 %rhs;\n\t", - !strconcat(".reg .u32 %amt2;\n\t", - !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t", - !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", - !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", - !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", - !strconcat("}}", ""))))))))), - [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; +// 64-bit software rotate left by register. +def ROTL64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "shl.b64 \t%lhs, $src, $amt;\n\t" + "sub.u32 \t%amt2, 64, $amt;\n\t" + "shr.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; + +def ROTR64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "shr.b64 \t%lhs, $src, $amt;\n\t" + "sub.u32 \t%amt2, 64, $amt;\n\t" + "shl.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; + +// +// Funnnel shift in clamp mode +// + +// Create SDNodes so they can be used in the DAG code, e.g. +// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) +def SDTIntShiftDOp : + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisInt<3>]>; +def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; +def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; + +def FUNSHFLCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; + +def FUNSHFRCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; +// // BFE - bit-field extract +// +// Template for BFE instructions. Takes four args, +// [dest (reg), src (reg), start (reg or imm), end (reg or imm)]. +// Start may be an imm only if end is also an imm. FIXME: Is this a +// restriction in PTX? +// +// dest and src may be int32 or int64, but start and end are always int32. multiclass BFE { - // BFE supports both 32-bit and 64-bit values, but the start and length - // operands are always 32-bit def rrr : NVPTXInst<(outs RC:$d), (ins RC:$a, Int32Regs:$b, Int32Regs:$c), @@ -1248,23 +1233,25 @@ defm BFE_U64 : BFE<"u64", Int64Regs>; //----------------------------------- -// General Comparison +// Comparison instructions (setp, set) //----------------------------------- -// General setp instructions +// FIXME: This doesn't cover versions of set and setp that combine with a +// boolean predicate, e.g. setp.eq.and.b16. + multiclass SETP { - def rr : NVPTXInst<(outs Int1Regs:$dst), - (ins RC:$a, RC:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), - []>; - def ri : NVPTXInst<(outs Int1Regs:$dst), - (ins RC:$a, ImmCls:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), - []>; - def ir : NVPTXInst<(outs Int1Regs:$dst), - (ins ImmCls:$a, RC:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"), - []>; + def rr : + NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + "\t$dst, $a, $b;"), []>; + def ri : + NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + "\t$dst, $a, $b;"), []>; + def ir : + NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + "\t$dst, $a, $b;"), []>; } defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; @@ -1279,7 +1266,10 @@ defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; -// General set instructions +// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form +// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination +// reg, either u32, s32, or f32. Anyway these aren't used at the moment. + multiclass SET { def rr : NVPTXInst<(outs Int32Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), @@ -1305,10 +1295,13 @@ defm SET_f64 : SET<"f64", Float64Regs, f64imm>; //----------------------------------- -// General Selection +// Selection instructions (selp) //----------------------------------- -// General selp instructions +// FIXME: Missing slct + +// selp instructions that don't have any pattern matches; we explicitly use +// them within this file. multiclass SELP { def rr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, Int1Regs:$p), @@ -1326,24 +1319,30 @@ multiclass SELP_PATTERN { - def rr : NVPTXInst<(outs RC:$dst), - (ins RC:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; - def ri : NVPTXInst<(outs RC:$dst), - (ins RC:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; - def ir : NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; - def ii : NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; + def rr : + NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; + def ri : + NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; + def ir : + NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; + def ii : + NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; } +// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as +// good. defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; @@ -1356,40 +1355,14 @@ defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; -// -// Funnnel shift in clamp mode -// -// - SDNodes are created so they can be used in the DAG code, -// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) -// -def SDTIntShiftDOp: SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisInt<3>]>; -def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; -def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; - -def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (FUN_SHFL_CLAMP Int32Regs:$lo, - Int32Regs:$hi, Int32Regs:$amt))]>; - -def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (FUN_SHFR_CLAMP Int32Regs:$lo, - Int32Regs:$hi, Int32Regs:$amt))]>; - //----------------------------------- // Data Movement (Load / Store, Move) //----------------------------------- def ADDRri : ComplexPattern; + [SDNPWantRoot]>; def ADDRri64 : ComplexPattern; + [SDNPWantRoot]>; def MEMri : Operand { let PrintMethod = "printMemOperand"; @@ -1401,82 +1374,81 @@ } def imem : Operand { - let PrintMethod = "printOperand"; + let PrintMethod = "printOperand"; } def imemAny : Operand { - let PrintMethod = "printOperand"; + let PrintMethod = "printOperand"; } def LdStCode : Operand { - let PrintMethod = "printLdStCode"; + let PrintMethod = "printLdStCode"; } def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; +// Load a memory address into a u32 or u64 register. def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), - "mov.u32 \t$dst, $a;", - [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; - + "mov.u32 \t$dst, $a;", + [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), - "mov.u64 \t$dst, $a;", - [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; + "mov.u64 \t$dst, $a;", + [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; -// Get pointer to local stack -def MOV_DEPOT_ADDR - : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), - "mov.u32 \t$d, __local_depot$num;", []>; -def MOV_DEPOT_ADDR_64 - : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), - "mov.u64 \t$d, __local_depot$num;", []>; +// Get pointer to local stack. +def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), + "mov.u32 \t$d, __local_depot$num;", []>; +def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), + "mov.u64 \t$d, __local_depot$num;", []>; // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp let IsSimpleMove=1 in { -def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), - "mov.pred \t$dst, $sss;", []>; -def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), - "mov.u16 \t$dst, $sss;", []>; -def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), - "mov.u32 \t$dst, $sss;", []>; -def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), - "mov.u64 \t$dst, $sss;", []>; - -def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), - "mov.f32 \t$dst, $src;", []>; -def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), - "mov.f64 \t$dst, $src;", []>; -} -def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), - "mov.pred \t$dst, $src;", - [(set Int1Regs:$dst, imm:$src)]>; -def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), - "mov.u16 \t$dst, $src;", - [(set Int16Regs:$dst, imm:$src)]>; -def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), - "mov.u32 \t$dst, $src;", - [(set Int32Regs:$dst, imm:$src)]>; -def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), - "mov.u64 \t$dst, $src;", - [(set Int64Regs:$dst, imm:$src)]>; - -def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), - "mov.f32 \t$dst, $src;", - [(set Float32Regs:$dst, fpimm:$src)]>; -def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), - "mov.f64 \t$dst, $src;", - [(set Float64Regs:$dst, fpimm:$src)]>; + def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), + "mov.pred \t$dst, $sss;", []>; + def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), + "mov.u16 \t$dst, $sss;", []>; + def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), + "mov.u32 \t$dst, $sss;", []>; + def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), + "mov.u64 \t$dst, $sss;", []>; + + def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "mov.f32 \t$dst, $src;", []>; + def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), + "mov.f64 \t$dst, $src;", []>; +} + +def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), + "mov.pred \t$dst, $src;", + [(set Int1Regs:$dst, imm:$src)]>; +def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), + "mov.u16 \t$dst, $src;", + [(set Int16Regs:$dst, imm:$src)]>; +def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), + "mov.u32 \t$dst, $src;", + [(set Int32Regs:$dst, imm:$src)]>; +def IMOV64i : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), + "mov.u64 \t$dst, $src;", + [(set Int64Regs:$dst, imm:$src)]>; + +def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), + "mov.f32 \t$dst, $src;", + [(set Float32Regs:$dst, fpimm:$src)]>; +def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), + "mov.f64 \t$dst, $src;", + [(set Float64Regs:$dst, fpimm:$src)]>; def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; //---- Copy Frame Index ---- -def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), - "add.u32 \t$dst, ${addr:add};", - [(set Int32Regs:$dst, ADDRri:$addr)]>; +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), + "add.u32 \t$dst, ${addr:add};", + [(set Int32Regs:$dst, ADDRri:$addr)]>; def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), - "add.u64 \t$dst, ${addr:add};", - [(set Int64Regs:$dst, ADDRri64:$addr)]>; + "add.u64 \t$dst, ${addr:add};", + [(set Int64Regs:$dst, ADDRri64:$addr)]>; //----------------------------------- // Comparison and Selection @@ -1554,7 +1526,7 @@ SET_s16rr, SET_s16ri, SET_s16ir, SET_s32rr, SET_s32ri, SET_s32ir, SET_s64rr, SET_s64ri, SET_s64ir> { - // TableGen doesn't like empty multiclasses + // TableGen doesn't like empty multiclasses. def : PatLeaf<(i32 0)>; } @@ -1566,21 +1538,21 @@ SET_u16rr, SET_u16ri, SET_u16ir, SET_u32rr, SET_u32ri, SET_u32ir, SET_u64rr, SET_u64ri, SET_u64ir> { - // TableGen doesn't like empty multiclasses + // TableGen doesn't like empty multiclasses. def : PatLeaf<(i32 0)>; } defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_UNSIGNED; defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_UNSIGNED; defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_UNSIGNED; defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_UNSIGNED; defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_UNSIGNED; defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; defm : ISET_FORMAT_UNSIGNED; // i1 compares @@ -1678,13 +1650,14 @@ defm FSetNUM : FSET_FORMAT; defm FSetNAN : FSET_FORMAT; -//def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, -// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - -def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, - SDTCisInt<2>]>; -def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, - SDTCisInt<1>, SDTCisInt<2>]>; +// FIXME: What is this doing here? Can it be deleted? +// def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, +// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def SDTDeclareParamProfile : + SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; +def SDTDeclareScalarParamProfile : + SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; @@ -1704,187 +1677,198 @@ def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; -def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", - SDTDeclareScalarParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam", - SDTDeclareParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, - [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; -def LoadParamV2 : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, - [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; -def LoadParamV4 : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, - [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; -def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, - []>; -def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, - [SDNPHasChain, SDNPSideEffect]>; -def StoreRetvalV2 : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, - [SDNPHasChain, SDNPSideEffect]>; -def StoreRetvalV4 : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, - [SDNPHasChain, SDNPSideEffect]>; -def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", - SDTPseudoUseParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, - [SDNPHasChain, SDNPSideEffect]>; +def DeclareParam : + SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareScalarParam : + SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRetParam : + SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRet : + SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LoadParam : + SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def LoadParamV2 : + SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def LoadParamV4 : + SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def PrintCall : + SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def PrintCallUni : + SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParam : + SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamV2 : + SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamV4 : + SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamU32 : + SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamS32 : + SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgBegin : + SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArg : + SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LastCallArg : + SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgEnd : + SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVoid : + SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def Prototype : + SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVal : + SDNode<"NVPTXISD::CallVal", SDTCallValProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def MoveParam : + SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; +def StoreRetval : + SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def StoreRetvalV2 : + SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, + [SDNPHasChain, SDNPSideEffect]>; +def StoreRetvalV4 : + SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, + [SDNPHasChain, SDNPSideEffect]>; +def PseudoUseParam : + SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def RETURNNode : + SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPSideEffect]>; class LoadParamMemInst : NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), !strconcat(!strconcat("ld.param", opstr), - "\t$dst, [retval0+$b];"), + "\t$dst, [retval0+$b];"), []>; class LoadParamRegInst : NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), - !strconcat(!strconcat("mov", opstr), - "\t$dst, retval$b;"), + !strconcat("mov", opstr, "\t$dst, retval$b;"), [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; class LoadParamV2MemInst : NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), - !strconcat(!strconcat("ld.param.v2", opstr), - "\t{{$dst, $dst2}}, [retval0+$b];"), []>; + !strconcat("ld.param.v2", opstr, + "\t{{$dst, $dst2}}, [retval0+$b];"), []>; class LoadParamV4MemInst : NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins i32imm:$b), - !strconcat(!strconcat("ld.param.v4", opstr), - "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>; + !strconcat("ld.param.v4", opstr, + "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), + []>; class StoreParamInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), - !strconcat(!strconcat("st.param", opstr), - "\t[param$a+$b], $val;"), + !strconcat("st.param", opstr, "\t[param$a+$b], $val;"), []>; class StoreParamV2Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a, i32imm:$b), - !strconcat(!strconcat("st.param.v2", opstr), - "\t[param$a+$b], {{$val, $val2}};"), + !strconcat("st.param.v2", opstr, + "\t[param$a+$b], {{$val, $val2}};"), []>; class StoreParamV4Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2, regclass:$val3, i32imm:$a, i32imm:$b), - !strconcat(!strconcat("st.param.v4", opstr), - "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), + !strconcat("st.param.v4", opstr, + "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), []>; class StoreRetvalInst : NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), - !strconcat(!strconcat("st.param", opstr), - "\t[func_retval0+$a], $val;"), + !strconcat("st.param", opstr, "\t[func_retval0+$a], $val;"), []>; class StoreRetvalV2Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), - !strconcat(!strconcat("st.param.v2", opstr), - "\t[func_retval0+$a], {{$val, $val2}};"), + !strconcat("st.param.v2", opstr, + "\t[func_retval0+$a], {{$val, $val2}};"), []>; class StoreRetvalV4Inst : NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, regclass:$val4, i32imm:$a), - !strconcat(!strconcat("st.param.v4", opstr), - "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), + !strconcat("st.param.v4", opstr, + "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), []>; -let isCall = 1 in { -def PrintCallRetInst1 : NVPTXInst<(outs), (ins), -"call (retval0), ", - [(PrintCall (i32 1))]>; -def PrintCallRetInst2 : NVPTXInst<(outs), (ins), -"call (retval0, retval1), ", - [(PrintCall (i32 2))]>; -def PrintCallRetInst3 : NVPTXInst<(outs), (ins), -"call (retval0, retval1, retval2), ", - [(PrintCall (i32 3))]>; -def PrintCallRetInst4 : NVPTXInst<(outs), (ins), -"call (retval0, retval1, retval2, retval3), ", - [(PrintCall (i32 4))]>; -def PrintCallRetInst5 : NVPTXInst<(outs), (ins), -"call (retval0, retval1, retval2, retval3, retval4), ", - [(PrintCall (i32 5))]>; -def PrintCallRetInst6 : NVPTXInst<(outs), (ins), -"call (retval0, retval1, retval2, retval3, retval4, retval5), ", - [(PrintCall (i32 6))]>; -def PrintCallRetInst7 : NVPTXInst<(outs), (ins), -"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", - [(PrintCall (i32 7))]>; -def PrintCallRetInst8 : NVPTXInst<(outs), (ins), -!strconcat("call (retval0, retval1, retval2, retval3, retval4", - ", retval5, retval6, retval7), "), - [(PrintCall (i32 8))]>; - -def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ", - [(PrintCall (i32 0))]>; - -def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), -"call.uni (retval0), ", - [(PrintCallUni (i32 1))]>; -def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), -"call.uni (retval0, retval1), ", - [(PrintCallUni (i32 2))]>; -def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), -"call.uni (retval0, retval1, retval2), ", - [(PrintCallUni (i32 3))]>; -def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), -"call.uni (retval0, retval1, retval2, retval3), ", - [(PrintCallUni (i32 4))]>; -def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), -"call.uni (retval0, retval1, retval2, retval3, retval4), ", - [(PrintCallUni (i32 5))]>; -def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), -"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", - [(PrintCallUni (i32 6))]>; -def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), -"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", - [(PrintCallUni (i32 7))]>; -def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), -!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4", - ", retval5, retval6, retval7), "), - [(PrintCallUni (i32 8))]>; - -def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", - [(PrintCallUni (i32 0))]>; -} // call instructions +let isCall=1 in { + def PrintCallNoRetInst : NVPTXInst<(outs), (ins), + "call ", [(PrintCall (i32 0))]>; + def PrintCallRetInst1 : NVPTXInst<(outs), (ins), + "call (retval0), ", [(PrintCall (i32 1))]>; + def PrintCallRetInst2 : NVPTXInst<(outs), (ins), + "call (retval0, retval1), ", [(PrintCall (i32 2))]>; + def PrintCallRetInst3 : NVPTXInst<(outs), (ins), + "call (retval0, retval1, retval2), ", [(PrintCall (i32 3))]>; + def PrintCallRetInst4 : NVPTXInst<(outs), (ins), + "call (retval0, retval1, retval2, retval3), ", [(PrintCall (i32 4))]>; + def PrintCallRetInst5 : NVPTXInst<(outs), (ins), + "call (retval0, retval1, retval2, retval3, retval4), ", + [(PrintCall (i32 5))]>; + def PrintCallRetInst6 : NVPTXInst<(outs), (ins), + "call (retval0, retval1, retval2, retval3, retval4, retval5), ", + [(PrintCall (i32 6))]>; + def PrintCallRetInst7 : NVPTXInst<(outs), (ins), + "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", + [(PrintCall (i32 7))]>; + def PrintCallRetInst8 : NVPTXInst<(outs), (ins), + "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6, " + "retval7), ", + [(PrintCall (i32 8))]>; + + def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), + "call.uni ", [(PrintCallUni (i32 0))]>; + def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), + "call.uni (retval0), ", [(PrintCallUni (i32 1))]>; + def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), + "call.uni (retval0, retval1), ", [(PrintCallUni (i32 2))]>; + def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), + "call.uni (retval0, retval1, retval2), ", [(PrintCallUni (i32 3))]>; + def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), + "call.uni (retval0, retval1, retval2, retval3), ", [(PrintCallUni (i32 4))]>; + def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), + "call.uni (retval0, retval1, retval2, retval3, retval4), ", + [(PrintCallUni (i32 5))]>; + def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), + "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", + [(PrintCallUni (i32 6))]>; + def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), + "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", + [(PrintCallUni (i32 7))]>; + def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), + "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6, " + "retval7), ", + [(PrintCallUni (i32 8))]>; +} def LoadParamMemI64 : LoadParamMemInst; def LoadParamMemI32 : LoadParamMemInst; @@ -1915,37 +1899,35 @@ // FIXME: StoreParamV4Inst crashes llvm-tblgen :( //def StoreParamV4I32 : StoreParamV4Inst; -def StoreParamV4I32 : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, - Int32Regs:$val3, Int32Regs:$val4, - i32imm:$a, i32imm:$b), - "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - []>; - -def StoreParamV4I16 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, - Int16Regs:$val3, Int16Regs:$val4, - i32imm:$a, i32imm:$b), - "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - []>; - -def StoreParamV4I8 : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, - Int16Regs:$val3, Int16Regs:$val4, - i32imm:$a, i32imm:$b), - "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - []>; +def StoreParamV4I32 : + NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, Int32Regs:$val3, + Int32Regs:$val4, i32imm:$a, i32imm:$b), + "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + []>; + +def StoreParamV4I16 : + NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, Int16Regs:$val3, + Int16Regs:$val4, i32imm:$a, i32imm:$b), + "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + []>; + +def StoreParamV4I8 : + NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, Int16Regs:$val3, + Int16Regs:$val4, i32imm:$a, i32imm:$b), + "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + []>; -def StoreParamF32 : StoreParamInst; -def StoreParamF64 : StoreParamInst; +def StoreParamF32 : StoreParamInst; +def StoreParamF64 : StoreParamInst; def StoreParamV2F32 : StoreParamV2Inst; def StoreParamV2F64 : StoreParamV2Inst; // FIXME: StoreParamV4Inst crashes llvm-tblgen :( //def StoreParamV4F32 : StoreParamV4Inst; -def StoreParamV4F32 : NVPTXInst<(outs), - (ins Float32Regs:$val, Float32Regs:$val2, - Float32Regs:$val3, Float32Regs:$val4, - i32imm:$a, i32imm:$b), - "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", - []>; - +def StoreParamV4F32 : + NVPTXInst<(outs), (ins Float32Regs:$val, Float32Regs:$val2, Float32Regs:$val3, + Float32Regs:$val4, i32imm:$a, i32imm:$b), + "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};", + []>; def StoreRetvalI64 : StoreRetvalInst; def StoreRetvalI32 : StoreRetvalInst; @@ -1971,89 +1953,88 @@ def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; class CallArgInst : - NVPTXInst<(outs), (ins regclass:$a), "$a, ", - [(CallArg (i32 0), regclass:$a)]>; + NVPTXInst<(outs), (ins regclass:$a), "$a, ", + [(CallArg (i32 0), regclass:$a)]>; class LastCallArgInst : - NVPTXInst<(outs), (ins regclass:$a), "$a", - [(LastCallArg (i32 0), regclass:$a)]>; + NVPTXInst<(outs), (ins regclass:$a), "$a", + [(LastCallArg (i32 0), regclass:$a)]>; def CallArgI64 : CallArgInst; def CallArgI32 : CallArgInst; def CallArgI16 : CallArgInst; - def CallArgF64 : CallArgInst; def CallArgF32 : CallArgInst; def LastCallArgI64 : LastCallArgInst; def LastCallArgI32 : LastCallArgInst; def LastCallArgI16 : LastCallArgInst; - def LastCallArgF64 : LastCallArgInst; def LastCallArgF32 : LastCallArgInst; def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", [(CallArg (i32 0), (i32 imm:$a))]>; def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", - [(LastCallArg (i32 0), (i32 imm:$a))]>; + [(LastCallArg (i32 0), (i32 imm:$a))]>; def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", [(CallArg (i32 1), (i32 imm:$a))]>; def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", - [(LastCallArg (i32 1), (i32 imm:$a))]>; + [(LastCallArg (i32 1), (i32 imm:$a))]>; -def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), - "$addr, ", - [(CallVoid (Wrapper tglobaladdr:$addr))]>; -def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), - "$addr, ", - [(CallVoid Int32Regs:$addr)]>; -def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), - "$addr, ", - [(CallVoid Int64Regs:$addr)]>; -def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), - ", prototype_$val;", - [(Prototype (i32 imm:$val))]>; - -def DeclareRetMemInst : NVPTXInst<(outs), - (ins i32imm:$align, i32imm:$size, i32imm:$num), - ".param .align $align .b8 retval$num[$size];", - [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; -def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), - ".param .b$size retval$num;", - [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; -def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), - ".reg .b$size retval$num;", - [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; - -def DeclareParamInst : NVPTXInst<(outs), - (ins i32imm:$align, i32imm:$a, i32imm:$size), - ".param .align $align .b8 param$a[$size];", - [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; -def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), - ".param .b$size param$a;", - [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; -def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), - ".reg .b$size param$a;", - [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; +def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", + [(CallVoid (Wrapper tglobaladdr:$addr))]>; +def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", + [(CallVoid Int32Regs:$addr)]>; +def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", + [(CallVoid Int64Regs:$addr)]>; +def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", + [(Prototype (i32 imm:$val))]>; + +def DeclareRetMemInst : + NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num), + ".param .align $align .b8 retval$num[$size];", + [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetScalarInst : + NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".param .b$size retval$num;", + [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetRegInst : + NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".reg .b$size retval$num;", + [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; + +def DeclareParamInst : + NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size), + ".param .align $align .b8 param$a[$size];", + [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; +def DeclareScalarParamInst : + NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".param .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; +def DeclareScalarRegInst : + NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".reg .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; class MoveParamInst : - NVPTXInst<(outs regclass:$dst), (ins regclass:$src), - !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"), - [(set regclass:$dst, (MoveParam regclass:$src))]>; + NVPTXInst<(outs regclass:$dst), (ins regclass:$src), + !strconcat("mov", asmstr, "\t$dst, $src;"), + [(set regclass:$dst, (MoveParam regclass:$src))]>; def MoveParamI64 : MoveParamInst; def MoveParamI32 : MoveParamInst; -def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "cvt.u16.u32\t$dst, $src;", - [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; +def MoveParamI16 : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.u16.u32\t$dst, $src;", + [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; def MoveParamF64 : MoveParamInst; def MoveParamF32 : MoveParamInst; class PseudoUseParamInst : - NVPTXInst<(outs), (ins regclass:$src), - "// Pseudo use of $src", - [(PseudoUseParam regclass:$src)]>; + NVPTXInst<(outs), (ins regclass:$src), + "// Pseudo use of $src", + [(PseudoUseParam regclass:$src)]>; def PseudoUseParamI64 : PseudoUseParamInst; def PseudoUseParamI32 : PseudoUseParamInst; @@ -2066,254 +2047,278 @@ // Load / Store Handling // multiclass LD { - def _avar : NVPTXInst<(outs regclass:$dst), + def _avar : NVPTXInst< + (outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), -!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t$dst, [$addr];"), []>; - def _areg : NVPTXInst<(outs regclass:$dst), + i32imm:$fromWidth, imem:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr];", []>; + def _areg : NVPTXInst< + (outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), -!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t$dst, [$addr];"), []>; - def _areg_64 : NVPTXInst<(outs regclass:$dst), + i32imm:$fromWidth, Int32Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr];", []>; + def _areg_64 : NVPTXInst< + (outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", - " \t$dst, [$addr];"), []>; - def _ari : NVPTXInst<(outs regclass:$dst), + i32imm:$fromWidth, Int64Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr];", []>; + def _ari : NVPTXInst< + (outs regclass:$dst), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), -!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t$dst, [$addr+$offset];"), []>; - def _ari_64 : NVPTXInst<(outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", - " \t$dst, [$addr+$offset];"), []>; - def _asi : NVPTXInst<(outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr, i32imm:$offset), -!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t$dst, [$addr+$offset];"), []>; + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr+$offset];", []>; + def _ari_64 : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr+$offset];", []>; + def _asi : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr+$offset];", []>; } let mayLoad=1, hasSideEffects=0 in { -defm LD_i8 : LD; -defm LD_i16 : LD; -defm LD_i32 : LD; -defm LD_i64 : LD; -defm LD_f32 : LD; -defm LD_f64 : LD; + defm LD_i8 : LD; + defm LD_i16 : LD; + defm LD_i32 : LD; + defm LD_i64 : LD; + defm LD_f32 : LD; + defm LD_f64 : LD; } multiclass ST { - def _avar : NVPTXInst<(outs), - (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, imem:$addr), -!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", - " \t[$addr], $src;"), []>; - def _areg : NVPTXInst<(outs), + def _avar : NVPTXInst< + (outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), -!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", - " \t[$addr], $src;"), []>; - def _areg_64 : NVPTXInst<(outs), + LdStCode:$Sign, i32imm:$toWidth, imem:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr], $src;", []>; + def _areg : NVPTXInst< + (outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr], $src;", []>; + def _areg_64 : NVPTXInst< + (outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", - "\t[$addr], $src;"), []>; - def _ari : NVPTXInst<(outs), + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr], $src;", []>; + def _ari : NVPTXInst< + (outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), -!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", - " \t[$addr+$offset], $src;"), []>; - def _ari_64 : NVPTXInst<(outs), + LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr+$offset], $src;", []>; + def _ari_64 : NVPTXInst< + (outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", - "\t[$addr+$offset], $src;"), []>; - def _asi : NVPTXInst<(outs), + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr+$offset], $src;", []>; + def _asi : NVPTXInst< + (outs), (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), -!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", - " \t[$addr+$offset], $src;"), []>; + LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr+$offset], $src;", []>; } let mayStore=1, hasSideEffects=0 in { -defm ST_i8 : ST; -defm ST_i16 : ST; -defm ST_i32 : ST; -defm ST_i64 : ST; -defm ST_f32 : ST; -defm ST_f64 : ST; + defm ST_i8 : ST; + defm ST_i16 : ST; + defm ST_i32 : ST; + defm ST_i64 : ST; + defm ST_f32 : ST; + defm ST_f64 : ST; } -// The following is used only in and after vector elementizations. -// Vector elementization happens at the machine instruction level, so the -// following instruction -// never appears in the DAG. +// The following is used only in and after vector elementizations. Vector +// elementization happens at the machine instruction level, so the following +// instructions never appear in the DAG. multiclass LD_VEC { - def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + def _v2_avar : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; - def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + i32imm:$fromWidth, imem:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v2_areg : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; - def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + i32imm:$fromWidth, Int32Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v2_areg_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; - def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + i32imm:$fromWidth, Int64Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v2_ari : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; - def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + def _v2_ari_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; - def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + def _v2_asi : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr, i32imm:$offset), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; - def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + def _v4_avar : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; - def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), + i32imm:$fromWidth, imem:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; + def _v4_areg : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; - def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), + i32imm:$fromWidth, Int32Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; + def _v4_areg_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; - def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), + i32imm:$fromWidth, Int64Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; + def _v4_ari : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), - []>; - def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, - regclass:$dst3, regclass:$dst4), + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + def _v4_ari_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), - []>; - def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, - regclass:$dst4), + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + def _v4_asi : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr, i32imm:$offset), - !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), - []>; + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; } let mayLoad=1, hasSideEffects=0 in { -defm LDV_i8 : LD_VEC; -defm LDV_i16 : LD_VEC; -defm LDV_i32 : LD_VEC; -defm LDV_i64 : LD_VEC; -defm LDV_f32 : LD_VEC; -defm LDV_f64 : LD_VEC; + defm LDV_i8 : LD_VEC; + defm LDV_i16 : LD_VEC; + defm LDV_i32 : LD_VEC; + defm LDV_i64 : LD_VEC; + defm LDV_f32 : LD_VEC; + defm LDV_f64 : LD_VEC; } multiclass ST_VEC { - def _v2_avar : NVPTXInst<(outs), + def _v2_avar : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; - def _v2_areg : NVPTXInst<(outs), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2}};", []>; + def _v2_areg : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; - def _v2_areg_64 : NVPTXInst<(outs), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2}};", []>; + def _v2_areg_64 : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; - def _v2_ari : NVPTXInst<(outs), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2}};", []>; + def _v2_ari : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, - i32imm:$offset), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; - def _v2_ari_64 : NVPTXInst<(outs), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, + i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2}};", []>; + def _v2_ari_64 : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, - i32imm:$offset), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; - def _v2_asi : NVPTXInst<(outs), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, + i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2}};", []>; + def _v2_asi : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, - i32imm:$offset), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; - def _v4_avar : NVPTXInst<(outs), + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, + i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2}};", []>; + def _v4_avar : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; - def _v4_areg : NVPTXInst<(outs), + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_areg : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; - def _v4_areg_64 : NVPTXInst<(outs), + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_areg_64 : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; - def _v4_ari : NVPTXInst<(outs), + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_ari : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), - []>; - def _v4_ari_64 : NVPTXInst<(outs), + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_ari_64 : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), - []>; - def _v4_asi : NVPTXInst<(outs), + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_asi : NVPTXInst< + (outs), (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr, i32imm:$offset), - !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", - "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), - []>; + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}" + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; } + let mayStore=1, hasSideEffects=0 in { -defm STV_i8 : ST_VEC; -defm STV_i16 : ST_VEC; -defm STV_i32 : ST_VEC; -defm STV_i64 : ST_VEC; -defm STV_f32 : ST_VEC; -defm STV_f64 : ST_VEC; + defm STV_i8 : ST_VEC; + defm STV_i16 : ST_VEC; + defm STV_i32 : ST_VEC; + defm STV_i64 : ST_VEC; + defm STV_f32 : ST_VEC; + defm STV_f64 : ST_VEC; } @@ -2529,60 +2534,47 @@ // pack a set of smaller int registers to a larger int register def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2, - Int16Regs:$s3, Int16Regs:$s4), - "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", - []>; + (ins Int16Regs:$s1, Int16Regs:$s2, + Int16Regs:$s3, Int16Regs:$s4), + "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>; def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2), - "mov.b32\t$d, {{$s1, $s2}};", - []>; + (ins Int16Regs:$s1, Int16Regs:$s2), + "mov.b32\t$d, {{$s1, $s2}};", []>; def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int32Regs:$s1, Int32Regs:$s2), - "mov.b64\t$d, {{$s1, $s2}};", - []>; + (ins Int32Regs:$s1, Int32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", []>; def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), - (ins Float32Regs:$s1, Float32Regs:$s2), - "mov.b64\t$d, {{$s1, $s2}};", - []>; + (ins Float32Regs:$s1, Float32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", []>; // unpack a larger int register to a set of smaller int registers def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, Int16Regs:$d3, Int16Regs:$d4), (ins Int64Regs:$s), - "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", - []>; + "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>; def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), (ins Int32Regs:$s), - "mov.b32\t{{$d1, $d2}}, $s;", - []>; + "mov.b32\t{{$d1, $d2}}, $s;", []>; def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), (ins Int64Regs:$s), - "mov.b64\t{{$d1, $d2}}, $s;", - []>; + "mov.b64\t{{$d1, $d2}}, $s;", []>; def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), (ins Float64Regs:$s), - "mov.b64\t{{$d1, $d2}}, $s;", - []>; + "mov.b64\t{{$d1, $d2}}, $s;", []>; // Count leading zeros def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), - "clz.b32\t$d, $a;", - []>; + "clz.b32\t$d, $a;", []>; def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "clz.b64\t$d, $a;", - []>; + "clz.b64\t$d, $a;", []>; // 32-bit has a direct PTX instruction -def : Pat<(ctlz Int32Regs:$a), - (CLZr32 Int32Regs:$a)>; -def : Pat<(ctlz_zero_undef Int32Regs:$a), - (CLZr32 Int32Regs:$a)>; +def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>; +def : Pat<(ctlz_zero_undef Int32Regs:$a), (CLZr32 Int32Regs:$a)>; // For 64-bit, the result in PTX is actually 32-bit so we zero-extend // to 64-bit to match the LLVM semantics -def : Pat<(ctlz Int64Regs:$a), - (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; def : Pat<(ctlz_zero_undef Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; @@ -2601,27 +2593,22 @@ // Population count def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), - "popc.b32\t$d, $a;", - []>; + "popc.b32\t$d, $a;", []>; def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "popc.b64\t$d, $a;", - []>; + "popc.b64\t$d, $a;", []>; // 32-bit has a direct PTX instruction -def : Pat<(ctpop Int32Regs:$a), - (POPCr32 Int32Regs:$a)>; +def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>; // For 64-bit, the result in PTX is actually 32-bit so we zero-extend // to 64-bit to match the LLVM semantics -def : Pat<(ctpop Int64Regs:$a), - (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; // For 16-bit, we zero-extend to 32-bit, then trunc the result back // to 16-bits (ctpop of a 16-bit value is guaranteed to require less // than 16 bits to store) def : Pat<(ctpop Int16Regs:$a), - (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), - CvtNONE)>; + (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; // fround f64 -> f32 def : Pat<(f32 (fround Float64Regs:$a)), @@ -2635,8 +2622,8 @@ def : Pat<(f64 (fextend Float32Regs:$a)), (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; -def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; +def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; //----------------------------------- // Control-flow @@ -2648,56 +2635,48 @@ let isBranch=1 in def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), - "@$a bra \t$target;", - [(brcond Int1Regs:$a, bb:$target)]>; + "@$a bra \t$target;", + [(brcond Int1Regs:$a, bb:$target)]>; let isBranch=1 in def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), - "@!$a bra \t$target;", - []>; + "@!$a bra \t$target;", []>; let isBranch=1, isBarrier=1 in def GOTO : NVPTXInst<(outs), (ins brtarget:$target), - "bra.uni \t$target;", - [(br bb:$target)]>; + "bra.uni \t$target;", [(br bb:$target)]>; } def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a -// conditional branch if -// the target block is the next block so that the code can fall through to the -// target block. -// The invertion is done by 'xor condition, 1', which will be translated to -// (setne condition, -1). -// Since ptx supports '@!pred bra target', we should use it. +// conditional branch if the target block is the next block so that the code +// can fall through to the target block. The invertion is done by 'xor +// condition, 1', which will be translated to (setne condition, -1). Since ptx +// supports '@!pred bra target', we should use it. def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), - (CBranchOther Int1Regs:$a, bb:$target)>; + (CBranchOther Int1Regs:$a, bb:$target)>; // Call -def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; -def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; +def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPSideEffect]>; + SDNPSideEffect]>; def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def calltarget : Operand; let isCall=1 in { - def CALL : NVPTXInst<(outs), (ins calltarget:$dst), - "call \t$dst, (1);", []>; + def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>; } -def : Pat<(call tglobaladdr:$dst), - (CALL tglobaladdr:$dst)>; -def : Pat<(call texternalsym:$dst), - (CALL texternalsym:$dst)>; +def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; +def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>; // Pseudo instructions. class Pseudo pattern> @@ -2705,31 +2684,34 @@ // @TODO: We use some tricks here to emit curly braces. Can we clean this up // a bit without TableGen modifications? -def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt), - "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// }}", - [(callseq_start timm:$amt)]>; -def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), - "\n\t//{{\n\t}}// Callseq End $amt1", - [(callseq_end timm:$amt1, timm:$amt2)]>; +def Callseq_Start : + NVPTXInst<(outs), (ins i32imm:$amt), + "// Callseq Start $amt\n" + "\t{{\n" + "\t.reg .b32 temp_param_reg;\n" + "\t// }}", + [(callseq_start timm:$amt)]>; +def Callseq_End : + NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "\n" + "\t//{{\n" + "\t}}// Callseq End $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; // trap instruction - -def trapinst : NVPTXInst<(outs), (ins), - "trap;", - [(trap)]>; +def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>; // Call prototype wrapper def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def CallPrototype - : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallPrototype : + SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def ProtoIdent : Operand { let PrintMethod = "printProtoIdent"; } -def CALL_PROTOTYPE - : NVPTXInst<(outs), (ins ProtoIdent:$ident), - "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; - +def CALL_PROTOTYPE : + NVPTXInst<(outs), (ins ProtoIdent:$ident), + "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; include "NVPTXIntrinsics.td"