ROSE  0.9.6a
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AssemblerX86.h
Go to the documentation of this file.
1 /* Assembly specific to the x86 architecture. */
2 
3 #ifndef ROSE_ASSEMBLER_X86_H
4 #define ROSE_ASSEMBLER_X86_H
5 
6 #include "Assembler.h"
7 
8 //#include "sage3.h"
9 
26 class AssemblerX86: public Assembler {
27 public:
29  : honor_operand_types(false) {
30  if (defns.size()==0)
32  }
33 
34  virtual ~AssemblerX86() {}
35 
38 
44  void set_honor_operand_types(bool b) {
46  }
47 
50  bool get_honor_operand_types() const {
51  return honor_operand_types;
52  }
53 
55  virtual SgUnsignedCharList assembleProgram(const std::string &source);
56 
57  /*========================================================================================================================
58  * Members for defining instructions.
59  *========================================================================================================================*/
60 private:
61  /* These bit masks specify how the opcode part of the encoding is generated. The base opcode bytes are specified with
62  * a 64-bit value so that up to eight bytes of opcode can be specified. The bytes generated come from the 64-bit opcode
63  * value in big-endian order but without leading zero bytes. If the 64-bit opcode is zero then a single zero byte is
64  * generated. For instance, the MONITOR instruction has an opcode of 0x0f01c8, generating the encoding 0x0f, 0x01, 0xc8. */
65 
69  static const unsigned od_e_mask = 0x00000070; /* mask for numeric value (n) part of En field. */
70  static const unsigned od_e_pres = 0x00000080; /* bit set if En modification was specified. */
71  static const unsigned od_e0 = 0x00000000 | od_e_pres;
72  static const unsigned od_e1 = 0x00000010 | od_e_pres;
73  static const unsigned od_e2 = 0x00000020 | od_e_pres;
74  static const unsigned od_e3 = 0x00000030 | od_e_pres;
75  static const unsigned od_e4 = 0x00000040 | od_e_pres;
76  static const unsigned od_e5 = 0x00000050 | od_e_pres;
77  static const unsigned od_e6 = 0x00000060 | od_e_pres;
78  static const unsigned od_e7 = 0x00000070 | od_e_pres;
79  static size_t od_e_val(unsigned opcode_mods) { return (opcode_mods & od_e_mask)>>4; }
84  static const unsigned od_rex_pres = 0x00000001; /* bit set if REX prefix is present. */
85  static const unsigned od_rex_mask = 0x00000f00; /* mask for low nyble of REX byte. */
86  static const unsigned od_rex = 0x00000000 | od_rex_pres;
87  static const unsigned od_rexb = 0x00000100 | od_rex_pres;
88  static const unsigned od_rexx = 0x00000200 | od_rex_pres;
89  static const unsigned od_rexxb = 0x00000300 | od_rex_pres;
90  static const unsigned od_rexr = 0x00000400 | od_rex_pres;
91  static const unsigned od_rexrb = 0x00000500 | od_rex_pres;
92  static const unsigned od_rexrx = 0x00000600 | od_rex_pres;
93  static const unsigned od_rexrxb = 0x00000700 | od_rex_pres;
94  static const unsigned od_rexw = 0x00000800 | od_rex_pres;
95  static const unsigned od_rexwb = 0x00000900 | od_rex_pres;
96  static const unsigned od_rexwx = 0x00000a00 | od_rex_pres;
97  static const unsigned od_rexwxb = 0x00000b00 | od_rex_pres;
98  static const unsigned od_rexwr = 0x00000c00 | od_rex_pres;
99  static const unsigned od_rexwrb = 0x00000d00 | od_rex_pres;
100  static const unsigned od_rexwrx = 0x00000e00 | od_rex_pres;
101  static const unsigned od_rexwrxb = 0x00000f00 | od_rex_pres;
102  static uint8_t od_rex_byte(unsigned opcode_mods) { return 0x40 | ((opcode_mods & od_rex_mask) >> 8); }
103 
106  static const unsigned od_modrm = 0x00000002;
107 
110  static const unsigned od_c_mask = 0x00007000;
111  static const unsigned od_cb = 0x00001000;
112  static const unsigned od_cw = 0x00002000;
113  static const unsigned od_cd = 0x00003000;
114  static const unsigned od_cp = 0x00004000;
115  static const unsigned od_co = 0x00005000;
116  static const unsigned od_ct = 0x00006000;
117 
120  static const unsigned od_i_mask = 0x00070000;
121  static const unsigned od_ib = 0x00010000;
122  static const unsigned od_iw = 0x00020000;
123  static const unsigned od_id = 0x00030000;
124  static const unsigned od_io = 0x00040000;
125 
128  static const unsigned od_r_mask = 0x00700000;
129  static const unsigned od_rb = 0x00100000;
130  static const unsigned od_rw = 0x00200000;
131  static const unsigned od_rd = 0x00300000;
132  static const unsigned od_ro = 0x00400000;
133 
137  static const unsigned od_i = 0x00000004;
138 
139 
142  {
299  /* The following are not documented in section 3.1.1.2 but are used elsewhere in the manual */
321  };
322 
324  static const unsigned COMPAT_LEGACY = 0x01;
325  static const unsigned COMPAT_64 = 0x02;
328  static uint8_t build_modrm(unsigned mod, unsigned reg, unsigned rm) {
329  return ((mod&0x3)<<6) | ((reg&0x7)<<3) | (rm&0x7);
330  }
331 
333  static unsigned modrm_mod(uint8_t modrm) { return modrm>>6; }
334 
336  static unsigned modrm_reg(uint8_t modrm) { return (modrm>>3) & 0x7; }
337 
339  static unsigned modrm_rm(uint8_t modrm) { return modrm & 0x7; }
340 
342  static uint8_t build_sib(unsigned ss, unsigned index, unsigned base) {
343  return ((ss&0x3)<<6) | ((index&0x7)<<3) | (base&0x7);
344  }
345 
347  static unsigned sib_ss(uint8_t sib) {return sib>>6; }
348 
350  static unsigned sib_index(uint8_t sib) { return (sib>>3) & 0x7; }
351 
353  static unsigned sib_base(uint8_t sib) { return sib & 0x7; }
354 
358  class InsnDefn {
359  public:
360  InsnDefn(const std::string &mnemonic, X86InstructionKind kind, unsigned compatibility, uint64_t opcode,
362  OperandDefn op4=od_none)
363  : mnemonic(mnemonic), kind(kind), compatibility(compatibility), opcode(opcode), opcode_modifiers(opcode_modifiers) {
364  if (op1) operands.push_back(op1);
365  if (op2) operands.push_back(op2);
366  if (op3) operands.push_back(op3);
367  if (op4) operands.push_back(op4);
368  }
369  std::string to_str() const;
370  void set_location(const std::string &s) {
371  location = s;
372  }
373  std::string mnemonic;
375  unsigned compatibility;
376  uint64_t opcode;
378  std::vector<OperandDefn> operands;
379  std::string location; /* location of instruction documentation */
380  };
381 
383  {
385  mrp_disp, /* displacement */
386  mrp_index, /* register*scale */
387  mrp_index_disp, /* register*scale + displacement */
388  mrp_base, /* register */
389  mrp_base_disp, /* register + displacement */
390  mrp_base_index, /* register + register*scale */
391  mrp_base_index_disp /* register + register*scale + displacement */
392  };
393 
395  typedef std::vector<const InsnDefn*> DictionaryPage;
396 
398  typedef std::map<X86InstructionKind, DictionaryPage> InsnDictionary;
399 
401  static void initAssemblyRules();
402  static void initAssemblyRules_part1();
403  static void initAssemblyRules_part2();
404  static void initAssemblyRules_part3();
405  static void initAssemblyRules_part4();
406  static void initAssemblyRules_part5();
407  static void initAssemblyRules_part6();
408  static void initAssemblyRules_part7();
409  static void initAssemblyRules_part8();
410  static void initAssemblyRules_part9();
411 
413  static void define(const InsnDefn *d) {
414  defns[d->kind].push_back(d);
415  }
416 
420  static std::string to_str(X86InstructionKind);
421 
428 
431  SgUnsignedCharList assemble(SgAsmx86Instruction *insn, const InsnDefn *defn);
432 
436  void matches(const InsnDefn *defn, SgAsmx86Instruction *insn, int64_t *disp, int64_t *imm) const;
437 
440  bool matches(OperandDefn, SgAsmExpression*, SgAsmInstruction*, int64_t *disp, int64_t *imm) const;
441 
443  static bool matches_rel(SgAsmInstruction*, int64_t val, size_t nbytes);
444 
449  SgAsmx86RegisterReferenceExpression **base_reg/*out*/,
450  SgAsmx86RegisterReferenceExpression **index_reg/*out*/,
451  SgAsmValueExpression **scale/*out*/, SgAsmValueExpression **displacement/*out*/);
452 
454  uint8_t build_modrm(const InsnDefn*, SgAsmx86Instruction*, size_t argno,
455  uint8_t *sib, int64_t *displacement, uint8_t *rex) const;
456 
458  void build_modreg(const InsnDefn*, SgAsmx86Instruction*, size_t argno, uint8_t *modrm, uint8_t *rex) const;
459 
463 
466 };
467 
468 #endif