20
20
namespace llvm {
21
21
namespace exegesis {
22
22
23
- namespace {
24
-
25
23
// Returns an error if we cannot handle the memory references in this
26
24
// instruction.
27
- Error isInvalidMemoryInstr (const Instruction &Instr) {
25
+ static Error isInvalidMemoryInstr (const Instruction &Instr) {
28
26
switch (Instr.Description ->TSFlags & X86II::FormMask) {
29
27
default :
30
28
llvm_unreachable (" Unknown FormMask value" );
@@ -169,78 +167,90 @@ static llvm::Error IsInvalidOpcode(const Instruction &Instr) {
169
167
return llvm::Error::success ();
170
168
}
171
169
172
- static unsigned GetX86FPFlags (const Instruction &Instr) {
170
+ static unsigned getX86FPFlags (const Instruction &Instr) {
173
171
return Instr.Description ->TSFlags & llvm::X86II::FPTypeMask;
174
172
}
175
173
174
+ namespace {
176
175
class X86LatencySnippetGenerator : public LatencySnippetGenerator {
177
176
public:
178
177
using LatencySnippetGenerator::LatencySnippetGenerator;
179
178
180
179
llvm::Expected<std::vector<CodeTemplate>>
181
- generateCodeTemplates (const Instruction &Instr) const override {
182
- if (auto E = IsInvalidOpcode (Instr))
183
- return std::move (E);
184
-
185
- switch (GetX86FPFlags (Instr)) {
186
- case llvm::X86II::NotFP:
187
- return LatencySnippetGenerator::generateCodeTemplates (Instr);
188
- case llvm::X86II::ZeroArgFP:
189
- case llvm::X86II::OneArgFP:
190
- case llvm::X86II::SpecialFP:
191
- case llvm::X86II::CompareFP:
192
- case llvm::X86II::CondMovFP:
193
- return llvm::make_error<BenchmarkFailure>(" Unsupported x87 Instruction" );
194
- case llvm::X86II::OneArgFPRW:
195
- case llvm::X86II::TwoArgFP:
196
- // These are instructions like
197
- // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
198
- // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
199
- // They are intrinsically serial and do not modify the state of the stack.
200
- return generateSelfAliasingCodeTemplates (Instr);
201
- default :
202
- llvm_unreachable (" Unknown FP Type!" );
203
- }
204
- }
180
+ generateCodeTemplates (const Instruction &Instr) const override ;
205
181
};
182
+ } // namespace
206
183
184
+ llvm::Expected<std::vector<CodeTemplate>>
185
+ X86LatencySnippetGenerator::generateCodeTemplates (
186
+ const Instruction &Instr) const {
187
+ if (auto E = IsInvalidOpcode (Instr))
188
+ return std::move (E);
189
+
190
+ switch (getX86FPFlags (Instr)) {
191
+ case llvm::X86II::NotFP:
192
+ return LatencySnippetGenerator::generateCodeTemplates (Instr);
193
+ case llvm::X86II::ZeroArgFP:
194
+ case llvm::X86II::OneArgFP:
195
+ case llvm::X86II::SpecialFP:
196
+ case llvm::X86II::CompareFP:
197
+ case llvm::X86II::CondMovFP:
198
+ return llvm::make_error<BenchmarkFailure>(" Unsupported x87 Instruction" );
199
+ case llvm::X86II::OneArgFPRW:
200
+ case llvm::X86II::TwoArgFP:
201
+ // These are instructions like
202
+ // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
203
+ // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
204
+ // They are intrinsically serial and do not modify the state of the stack.
205
+ return generateSelfAliasingCodeTemplates (Instr);
206
+ default :
207
+ llvm_unreachable (" Unknown FP Type!" );
208
+ }
209
+ }
210
+
211
+ namespace {
207
212
class X86UopsSnippetGenerator : public UopsSnippetGenerator {
208
213
public:
209
214
using UopsSnippetGenerator::UopsSnippetGenerator;
210
215
211
216
llvm::Expected<std::vector<CodeTemplate>>
212
- generateCodeTemplates (const Instruction &Instr) const override {
213
- if (auto E = IsInvalidOpcode (Instr))
214
- return std::move (E);
215
-
216
- switch (GetX86FPFlags (Instr)) {
217
- case llvm::X86II::NotFP:
218
- return UopsSnippetGenerator::generateCodeTemplates (Instr);
219
- case llvm::X86II::ZeroArgFP:
220
- case llvm::X86II::OneArgFP:
221
- case llvm::X86II::SpecialFP:
222
- return llvm::make_error<BenchmarkFailure>(" Unsupported x87 Instruction" );
223
- case llvm::X86II::OneArgFPRW:
224
- case llvm::X86II::TwoArgFP:
225
- // These are instructions like
226
- // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
227
- // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
228
- // They are intrinsically serial and do not modify the state of the stack.
229
- // We generate the same code for latency and uops.
230
- return generateSelfAliasingCodeTemplates (Instr);
231
- case llvm::X86II::CompareFP:
232
- case llvm::X86II::CondMovFP:
233
- // We can compute uops for any FP instruction that does not grow or shrink
234
- // the stack (either do not touch the stack or push as much as they pop).
235
- return generateUnconstrainedCodeTemplates (
236
- Instr, " instruction does not grow/shrink the FP stack" );
237
- default :
238
- llvm_unreachable (" Unknown FP Type!" );
239
- }
240
- }
217
+ generateCodeTemplates (const Instruction &Instr) const override ;
241
218
};
219
+ } // namespace
220
+
221
+ llvm::Expected<std::vector<CodeTemplate>>
222
+ X86UopsSnippetGenerator::generateCodeTemplates (
223
+ const Instruction &Instr) const {
224
+ if (auto E = IsInvalidOpcode (Instr))
225
+ return std::move (E);
226
+
227
+ switch (getX86FPFlags (Instr)) {
228
+ case llvm::X86II::NotFP:
229
+ return UopsSnippetGenerator::generateCodeTemplates (Instr);
230
+ case llvm::X86II::ZeroArgFP:
231
+ case llvm::X86II::OneArgFP:
232
+ case llvm::X86II::SpecialFP:
233
+ return llvm::make_error<BenchmarkFailure>(" Unsupported x87 Instruction" );
234
+ case llvm::X86II::OneArgFPRW:
235
+ case llvm::X86II::TwoArgFP:
236
+ // These are instructions like
237
+ // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
238
+ // - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
239
+ // They are intrinsically serial and do not modify the state of the stack.
240
+ // We generate the same code for latency and uops.
241
+ return generateSelfAliasingCodeTemplates (Instr);
242
+ case llvm::X86II::CompareFP:
243
+ case llvm::X86II::CondMovFP:
244
+ // We can compute uops for any FP instruction that does not grow or shrink
245
+ // the stack (either do not touch the stack or push as much as they pop).
246
+ return generateUnconstrainedCodeTemplates (
247
+ Instr, " instruction does not grow/shrink the FP stack" );
248
+ default :
249
+ llvm_unreachable (" Unknown FP Type!" );
250
+ }
251
+ }
242
252
243
- static unsigned GetLoadImmediateOpcode (unsigned RegBitWidth) {
253
+ static unsigned getLoadImmediateOpcode (unsigned RegBitWidth) {
244
254
switch (RegBitWidth) {
245
255
case 8 :
246
256
return llvm::X86::MOV8ri;
@@ -259,7 +269,7 @@ static llvm::MCInst loadImmediate(unsigned Reg, unsigned RegBitWidth,
259
269
const llvm::APInt &Value) {
260
270
if (Value.getBitWidth () > RegBitWidth)
261
271
llvm_unreachable (" Value must fit in the Register" );
262
- return llvm::MCInstBuilder (GetLoadImmediateOpcode (RegBitWidth))
272
+ return llvm::MCInstBuilder (getLoadImmediateOpcode (RegBitWidth))
263
273
.addReg (Reg)
264
274
.addImm (Value.getZExtValue ());
265
275
}
@@ -308,181 +318,123 @@ static llvm::MCInst releaseStackSpace(unsigned Bytes) {
308
318
309
319
// Reserves some space on the stack, fills it with the content of the provided
310
320
// constant and provide methods to load the stack value into a register.
321
+ namespace {
311
322
struct ConstantInliner {
312
323
explicit ConstantInliner (const llvm::APInt &Constant) : Constant_(Constant) {}
313
324
314
325
std::vector<llvm::MCInst> loadAndFinalize (unsigned Reg, unsigned RegBitWidth,
315
- unsigned Opcode) {
316
- assert ((RegBitWidth & 7 ) == 0 &&
317
- " RegBitWidth must be a multiple of 8 bits" );
318
- initStack (RegBitWidth / 8 );
319
- add (loadToReg (Reg, Opcode));
320
- add (releaseStackSpace (RegBitWidth / 8 ));
321
- return std::move (Instructions);
322
- }
326
+ unsigned Opcode);
323
327
324
- std::vector<llvm::MCInst> loadX87STAndFinalize (unsigned Reg) {
325
- initStack (kF80Bytes );
326
- add (llvm::MCInstBuilder (llvm::X86::LD_F80m)
327
- // Address = ESP
328
- .addReg (llvm::X86::RSP) // BaseReg
329
- .addImm (1 ) // ScaleAmt
330
- .addReg (0 ) // IndexReg
331
- .addImm (0 ) // Disp
332
- .addReg (0 )); // Segment
333
- if (Reg != llvm::X86::ST0)
334
- add (llvm::MCInstBuilder (llvm::X86::ST_Frr).addReg (Reg));
335
- add (releaseStackSpace (kF80Bytes ));
336
- return std::move (Instructions);
337
- }
328
+ std::vector<llvm::MCInst> loadX87STAndFinalize (unsigned Reg);
338
329
339
- std::vector<llvm::MCInst> loadX87FPAndFinalize (unsigned Reg) {
340
- initStack (kF80Bytes );
341
- add (llvm::MCInstBuilder (llvm::X86::LD_Fp80m)
342
- .addReg (Reg)
343
- // Address = ESP
344
- .addReg (llvm::X86::RSP) // BaseReg
345
- .addImm (1 ) // ScaleAmt
346
- .addReg (0 ) // IndexReg
347
- .addImm (0 ) // Disp
348
- .addReg (0 )); // Segment
349
- add (releaseStackSpace (kF80Bytes ));
350
- return std::move (Instructions);
351
- }
330
+ std::vector<llvm::MCInst> loadX87FPAndFinalize (unsigned Reg);
352
331
353
- std::vector<llvm::MCInst> popFlagAndFinalize () {
354
- initStack (8 );
355
- add (llvm::MCInstBuilder (llvm::X86::POPF64));
356
- return std::move (Instructions);
357
- }
332
+ std::vector<llvm::MCInst> popFlagAndFinalize ();
358
333
359
334
private:
360
- static constexpr const unsigned kF80Bytes = 10 ; // 80 bits.
361
-
362
335
ConstantInliner &add (const llvm::MCInst &Inst) {
363
336
Instructions.push_back (Inst);
364
337
return *this ;
365
338
}
366
339
367
- void initStack (unsigned Bytes) {
368
- assert (Constant_.getBitWidth () <= Bytes * 8 &&
369
- " Value does not have the correct size" );
370
- const llvm::APInt WideConstant = Constant_.getBitWidth () < Bytes * 8
371
- ? Constant_.sext (Bytes * 8 )
372
- : Constant_;
373
- add (allocateStackSpace (Bytes));
374
- size_t ByteOffset = 0 ;
375
- for (; Bytes - ByteOffset >= 4 ; ByteOffset += 4 )
376
- add (fillStackSpace (
377
- llvm::X86::MOV32mi, ByteOffset ,
378
- WideConstant.extractBits (32 , ByteOffset * 8 ).getZExtValue ()));
379
- if (Bytes - ByteOffset >= 2 ) {
380
- add (fillStackSpace (
381
- llvm::X86::MOV16mi, ByteOffset ,
382
- WideConstant.extractBits (16 , ByteOffset * 8 ).getZExtValue ()));
383
- ByteOffset += 2 ;
384
- }
385
- if (Bytes - ByteOffset >= 1 )
386
- add (fillStackSpace (
387
- llvm::X86::MOV8mi, ByteOffset ,
388
- WideConstant.extractBits (8 , ByteOffset * 8 ).getZExtValue ()));
389
- }
340
+ void initStack (unsigned Bytes);
341
+
342
+ static constexpr const unsigned kF80Bytes = 10 ; // 80 bits.
390
343
391
344
llvm::APInt Constant_;
392
345
std::vector<llvm::MCInst> Instructions;
393
346
};
347
+ } // namespace
348
+
349
+ std::vector<llvm::MCInst> ConstantInliner::loadAndFinalize (unsigned Reg,
350
+ unsigned RegBitWidth,
351
+ unsigned Opcode) {
352
+ assert ((RegBitWidth & 7 ) == 0 && " RegBitWidth must be a multiple of 8 bits" );
353
+ initStack (RegBitWidth / 8 );
354
+ add (loadToReg (Reg, Opcode));
355
+ add (releaseStackSpace (RegBitWidth / 8 ));
356
+ return std::move (Instructions);
357
+ }
358
+
359
+ std::vector<llvm::MCInst> ConstantInliner::loadX87STAndFinalize (unsigned Reg) {
360
+ initStack (kF80Bytes );
361
+ add (llvm::MCInstBuilder (llvm::X86::LD_F80m)
362
+ // Address = ESP
363
+ .addReg (llvm::X86::RSP) // BaseReg
364
+ .addImm (1 ) // ScaleAmt
365
+ .addReg (0 ) // IndexReg
366
+ .addImm (0 ) // Disp
367
+ .addReg (0 )); // Segment
368
+ if (Reg != llvm::X86::ST0)
369
+ add (llvm::MCInstBuilder (llvm::X86::ST_Frr).addReg (Reg));
370
+ add (releaseStackSpace (kF80Bytes ));
371
+ return std::move (Instructions);
372
+ }
373
+
374
+ std::vector<llvm::MCInst> ConstantInliner::loadX87FPAndFinalize (unsigned Reg) {
375
+ initStack (kF80Bytes );
376
+ add (llvm::MCInstBuilder (llvm::X86::LD_Fp80m)
377
+ .addReg (Reg)
378
+ // Address = ESP
379
+ .addReg (llvm::X86::RSP) // BaseReg
380
+ .addImm (1 ) // ScaleAmt
381
+ .addReg (0 ) // IndexReg
382
+ .addImm (0 ) // Disp
383
+ .addReg (0 )); // Segment
384
+ add (releaseStackSpace (kF80Bytes ));
385
+ return std::move (Instructions);
386
+ }
387
+
388
+ std::vector<llvm::MCInst> ConstantInliner::popFlagAndFinalize () {
389
+ initStack (8 );
390
+ add (llvm::MCInstBuilder (llvm::X86::POPF64));
391
+ return std::move (Instructions);
392
+ }
393
+
394
+ void ConstantInliner::initStack (unsigned Bytes) {
395
+ assert (Constant_.getBitWidth () <= Bytes * 8 &&
396
+ " Value does not have the correct size" );
397
+ const llvm::APInt WideConstant = Constant_.getBitWidth () < Bytes * 8
398
+ ? Constant_.sext (Bytes * 8 )
399
+ : Constant_;
400
+ add (allocateStackSpace (Bytes));
401
+ size_t ByteOffset = 0 ;
402
+ for (; Bytes - ByteOffset >= 4 ; ByteOffset += 4 )
403
+ add (fillStackSpace (
404
+ llvm::X86::MOV32mi, ByteOffset ,
405
+ WideConstant.extractBits (32 , ByteOffset * 8 ).getZExtValue ()));
406
+ if (Bytes - ByteOffset >= 2 ) {
407
+ add (fillStackSpace (
408
+ llvm::X86::MOV16mi, ByteOffset ,
409
+ WideConstant.extractBits (16 , ByteOffset * 8 ).getZExtValue ()));
410
+ ByteOffset += 2 ;
411
+ }
412
+ if (Bytes - ByteOffset >= 1 )
413
+ add (fillStackSpace (
414
+ llvm::X86::MOV8mi, ByteOffset ,
415
+ WideConstant.extractBits (8 , ByteOffset * 8 ).getZExtValue ()));
416
+ }
394
417
395
418
#include " X86GenExegesis.inc"
396
419
420
+ namespace {
397
421
class ExegesisX86Target : public ExegesisTarget {
398
422
public:
399
423
ExegesisX86Target () : ExegesisTarget(X86CpuPfmCounters) {}
400
424
401
425
private:
402
- void addTargetSpecificPasses (llvm::PassManagerBase &PM) const override {
403
- // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
404
- PM.add (llvm::createX86FloatingPointStackifierPass ());
405
- }
426
+ void addTargetSpecificPasses (llvm::PassManagerBase &PM) const override ;
406
427
407
- unsigned getScratchMemoryRegister (const llvm::Triple &TT) const override {
408
- if (!TT.isArch64Bit ()) {
409
- // FIXME: This would require popping from the stack, so we would have to
410
- // add some additional setup code.
411
- return 0 ;
412
- }
413
- return TT.isOSWindows () ? llvm::X86::RCX : llvm::X86::RDI;
414
- }
428
+ unsigned getScratchMemoryRegister (const llvm::Triple &TT) const override ;
415
429
416
430
unsigned getMaxMemoryAccessSize () const override { return 64 ; }
417
431
418
432
void fillMemoryOperands (InstructionTemplate &IT, unsigned Reg,
419
- unsigned Offset) const override {
420
- assert (!isInvalidMemoryInstr (IT.Instr ) &&
421
- " fillMemoryOperands requires a valid memory instruction" );
422
- int MemOpIdx = X86II::getMemoryOperandNo (IT.Instr .Description ->TSFlags );
423
- assert (MemOpIdx >= 0 && " invalid memory operand index" );
424
- // getMemoryOperandNo() ignores tied operands, so we have to add them back.
425
- for (unsigned I = 0 ; I <= static_cast <unsigned >(MemOpIdx); ++I) {
426
- const auto &Op = IT.Instr .Operands [I];
427
- if (Op.isTied () && Op.getTiedToIndex () < I) {
428
- ++MemOpIdx;
429
- }
430
- }
431
- // Now fill in the memory operands.
432
- const auto SetOp = [&IT](int OpIdx, const MCOperand &OpVal) {
433
- const auto Op = IT.Instr .Operands [OpIdx];
434
- assert (Op.isMemory () && Op.isExplicit () && " invalid memory pattern" );
435
- IT.getValueFor (Op) = OpVal;
436
- };
437
- SetOp (MemOpIdx + 0 , MCOperand::createReg (Reg)); // BaseReg
438
- SetOp (MemOpIdx + 1 , MCOperand::createImm (1 )); // ScaleAmt
439
- SetOp (MemOpIdx + 2 , MCOperand::createReg (0 )); // IndexReg
440
- SetOp (MemOpIdx + 3 , MCOperand::createImm (Offset)); // Disp
441
- SetOp (MemOpIdx + 4 , MCOperand::createReg (0 )); // Segment
442
- }
433
+ unsigned Offset) const override ;
443
434
444
435
std::vector<llvm::MCInst> setRegTo (const llvm::MCSubtargetInfo &STI,
445
436
unsigned Reg,
446
- const llvm::APInt &Value) const override {
447
- if (llvm::X86::GR8RegClass.contains (Reg))
448
- return {loadImmediate (Reg, 8 , Value)};
449
- if (llvm::X86::GR16RegClass.contains (Reg))
450
- return {loadImmediate (Reg, 16 , Value)};
451
- if (llvm::X86::GR32RegClass.contains (Reg))
452
- return {loadImmediate (Reg, 32 , Value)};
453
- if (llvm::X86::GR64RegClass.contains (Reg))
454
- return {loadImmediate (Reg, 64 , Value)};
455
- ConstantInliner CI (Value);
456
- if (llvm::X86::VR64RegClass.contains (Reg))
457
- return CI.loadAndFinalize (Reg, 64 , llvm::X86::MMX_MOVQ64rm);
458
- if (llvm::X86::VR128XRegClass.contains (Reg)) {
459
- if (STI.getFeatureBits ()[llvm::X86::FeatureAVX512])
460
- return CI.loadAndFinalize (Reg, 128 , llvm::X86::VMOVDQU32Z128rm);
461
- if (STI.getFeatureBits ()[llvm::X86::FeatureAVX])
462
- return CI.loadAndFinalize (Reg, 128 , llvm::X86::VMOVDQUrm);
463
- return CI.loadAndFinalize (Reg, 128 , llvm::X86::MOVDQUrm);
464
- }
465
- if (llvm::X86::VR256XRegClass.contains (Reg)) {
466
- if (STI.getFeatureBits ()[llvm::X86::FeatureAVX512])
467
- return CI.loadAndFinalize (Reg, 256 , llvm::X86::VMOVDQU32Z256rm);
468
- if (STI.getFeatureBits ()[llvm::X86::FeatureAVX])
469
- return CI.loadAndFinalize (Reg, 256 , llvm::X86::VMOVDQUYrm);
470
- }
471
- if (llvm::X86::VR512RegClass.contains (Reg))
472
- if (STI.getFeatureBits ()[llvm::X86::FeatureAVX512])
473
- return CI.loadAndFinalize (Reg, 512 , llvm::X86::VMOVDQU32Zrm);
474
- if (llvm::X86::RSTRegClass.contains (Reg)) {
475
- return CI.loadX87STAndFinalize (Reg);
476
- }
477
- if (llvm::X86::RFP32RegClass.contains (Reg) ||
478
- llvm::X86::RFP64RegClass.contains (Reg) ||
479
- llvm::X86::RFP80RegClass.contains (Reg)) {
480
- return CI.loadX87FPAndFinalize (Reg);
481
- }
482
- if (Reg == llvm::X86::EFLAGS)
483
- return CI.popFlagAndFinalize ();
484
- return {}; // Not yet implemented.
485
- }
437
+ const llvm::APInt &Value) const override ;
486
438
487
439
std::unique_ptr<SnippetGenerator>
488
440
createLatencySnippetGenerator (const LLVMState &State) const override {
@@ -498,9 +450,94 @@ class ExegesisX86Target : public ExegesisTarget {
498
450
return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
499
451
}
500
452
};
501
-
502
453
} // namespace
503
454
455
+ void ExegesisX86Target::addTargetSpecificPasses (
456
+ llvm::PassManagerBase &PM) const {
457
+ // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
458
+ PM.add (llvm::createX86FloatingPointStackifierPass ());
459
+ }
460
+
461
+ unsigned
462
+ ExegesisX86Target::getScratchMemoryRegister (const llvm::Triple &TT) const {
463
+ if (!TT.isArch64Bit ()) {
464
+ // FIXME: This would require popping from the stack, so we would have to
465
+ // add some additional setup code.
466
+ return 0 ;
467
+ }
468
+ return TT.isOSWindows () ? llvm::X86::RCX : llvm::X86::RDI;
469
+ }
470
+
471
+ void ExegesisX86Target::fillMemoryOperands (InstructionTemplate &IT,
472
+ unsigned Reg,
473
+ unsigned Offset) const {
474
+ assert (!isInvalidMemoryInstr (IT.Instr ) &&
475
+ " fillMemoryOperands requires a valid memory instruction" );
476
+ int MemOpIdx = X86II::getMemoryOperandNo (IT.Instr .Description ->TSFlags );
477
+ assert (MemOpIdx >= 0 && " invalid memory operand index" );
478
+ // getMemoryOperandNo() ignores tied operands, so we have to add them back.
479
+ for (unsigned I = 0 ; I <= static_cast <unsigned >(MemOpIdx); ++I) {
480
+ const auto &Op = IT.Instr .Operands [I];
481
+ if (Op.isTied () && Op.getTiedToIndex () < I) {
482
+ ++MemOpIdx;
483
+ }
484
+ }
485
+ // Now fill in the memory operands.
486
+ const auto SetOp = [&IT](int OpIdx, const MCOperand &OpVal) {
487
+ const auto Op = IT.Instr .Operands [OpIdx];
488
+ assert (Op.isMemory () && Op.isExplicit () && " invalid memory pattern" );
489
+ IT.getValueFor (Op) = OpVal;
490
+ };
491
+ SetOp (MemOpIdx + 0 , MCOperand::createReg (Reg)); // BaseReg
492
+ SetOp (MemOpIdx + 1 , MCOperand::createImm (1 )); // ScaleAmt
493
+ SetOp (MemOpIdx + 2 , MCOperand::createReg (0 )); // IndexReg
494
+ SetOp (MemOpIdx + 3 , MCOperand::createImm (Offset)); // Disp
495
+ SetOp (MemOpIdx + 4 , MCOperand::createReg (0 )); // Segment
496
+ }
497
+
498
+ std::vector<llvm::MCInst>
499
+ ExegesisX86Target::setRegTo (const llvm::MCSubtargetInfo &STI, unsigned Reg,
500
+ const llvm::APInt &Value) const {
501
+ if (llvm::X86::GR8RegClass.contains (Reg))
502
+ return {loadImmediate (Reg, 8 , Value)};
503
+ if (llvm::X86::GR16RegClass.contains (Reg))
504
+ return {loadImmediate (Reg, 16 , Value)};
505
+ if (llvm::X86::GR32RegClass.contains (Reg))
506
+ return {loadImmediate (Reg, 32 , Value)};
507
+ if (llvm::X86::GR64RegClass.contains (Reg))
508
+ return {loadImmediate (Reg, 64 , Value)};
509
+ ConstantInliner CI (Value);
510
+ if (llvm::X86::VR64RegClass.contains (Reg))
511
+ return CI.loadAndFinalize (Reg, 64 , llvm::X86::MMX_MOVQ64rm);
512
+ if (llvm::X86::VR128XRegClass.contains (Reg)) {
513
+ if (STI.getFeatureBits ()[llvm::X86::FeatureAVX512])
514
+ return CI.loadAndFinalize (Reg, 128 , llvm::X86::VMOVDQU32Z128rm);
515
+ if (STI.getFeatureBits ()[llvm::X86::FeatureAVX])
516
+ return CI.loadAndFinalize (Reg, 128 , llvm::X86::VMOVDQUrm);
517
+ return CI.loadAndFinalize (Reg, 128 , llvm::X86::MOVDQUrm);
518
+ }
519
+ if (llvm::X86::VR256XRegClass.contains (Reg)) {
520
+ if (STI.getFeatureBits ()[llvm::X86::FeatureAVX512])
521
+ return CI.loadAndFinalize (Reg, 256 , llvm::X86::VMOVDQU32Z256rm);
522
+ if (STI.getFeatureBits ()[llvm::X86::FeatureAVX])
523
+ return CI.loadAndFinalize (Reg, 256 , llvm::X86::VMOVDQUYrm);
524
+ }
525
+ if (llvm::X86::VR512RegClass.contains (Reg))
526
+ if (STI.getFeatureBits ()[llvm::X86::FeatureAVX512])
527
+ return CI.loadAndFinalize (Reg, 512 , llvm::X86::VMOVDQU32Zrm);
528
+ if (llvm::X86::RSTRegClass.contains (Reg)) {
529
+ return CI.loadX87STAndFinalize (Reg);
530
+ }
531
+ if (llvm::X86::RFP32RegClass.contains (Reg) ||
532
+ llvm::X86::RFP64RegClass.contains (Reg) ||
533
+ llvm::X86::RFP80RegClass.contains (Reg)) {
534
+ return CI.loadX87FPAndFinalize (Reg);
535
+ }
536
+ if (Reg == llvm::X86::EFLAGS)
537
+ return CI.popFlagAndFinalize ();
538
+ return {}; // Not yet implemented.
539
+ }
540
+
504
541
static ExegesisTarget *getTheExegesisX86Target () {
505
542
static ExegesisX86Target Target;
506
543
return &Target;
0 commit comments