ROSE  0.9.6a
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Disassembler.C
Go to the documentation of this file.
1 #include "sage3basic.h"
2 #include "Assembler.h"
3 #include "AssemblerX86.h"
4 #include "AsmUnparser_compat.h"
5 #include "Disassembler.h"
6 #include "DisassemblerPowerpc.h"
7 #include "DisassemblerArm.h"
8 #include "DisassemblerMips.h"
9 #include "DisassemblerX86.h"
10 #include "BinaryLoader.h"
11 #include "Partitioner.h"
12 
13 #include <stdarg.h>
14 
15 /* See header file for full documentation of all methods in this file. */
16 
17 
18 /* Mutex for class-wide operations (such as adjusting Disassembler::disassemblers) */
20 
21 /* List of disassembler subclasses (protect with class_mutex) */
22 std::vector<Disassembler*> Disassembler::disassemblers;
23 
24 /* Hook for construction */
26 #if 0
27  p_debug = stderr;
28 #endif
29 }
30 
31 void
32 Disassembler::Exception::print(std::ostream &o) const
33 {
34  if (insn) {
35  o <<"disassembly failed at " <<StringUtility::addrToString(ip)
36  <<" [" <<unparseInstruction(insn) <<"]"
37  <<": " <<what();
38  } else if (ip>0) {
39  o <<"disassembly failed at " <<StringUtility::addrToString(ip);
40  if (!bytes.empty()) {
41  for (size_t i=0; i<bytes.size(); i++) {
42  o <<(i>0?", ":"[")
43  <<std::hex <<std::setfill('0') <<std::setw(2)
44  <<"0x" <<bytes[i]
45  <<std::dec <<std::setfill(' ') <<std::setw(1);
46  }
47  o <<"] at bit " <<bit;
48  }
49  } else {
50  o <<what();
51  }
52 }
53 
54 std::ostream &
55 operator<<(std::ostream &o, const Disassembler::Exception &e)
56 {
57  e.print(o);
58  return o;
59 }
60 
61 unsigned
62 Disassembler::parse_switches(const std::string &s, unsigned flags)
63 {
64  size_t at=0;
65  while (at<s.size()) {
66  enum { SET_BIT, CLEAR_BIT, SET_VALUE, NOT_SPECIFIED } howset = NOT_SPECIFIED;
67 
68  if (s[at]=='-') {
69  howset = CLEAR_BIT;
70  at++;
71  } else if (s[at]=='+') {
72  howset = SET_BIT;
73  at++;
74  } else if (s[at]=='=') {
75  howset = SET_VALUE;
76  at++;
77  }
78  if (at>=s.size())
79  throw Exception("heuristic name must follow qualifier");
80 
81 
82  size_t comma = s.find(",", at);
83  std::string word = std::string(s, at, comma-at);
84  if (word.size()==0)
85  throw Exception("heuristic name must follow comma");
86 
87  unsigned bits = 0;
88  if (word == "following") {
89  bits = SEARCH_FOLLOWING;
90  } else if (word == "immediate") {
91  bits = SEARCH_IMMEDIATE;
92  } else if (word == "words") {
93  bits = SEARCH_WORDS;
94  } else if (word == "allbytes") {
95  bits = SEARCH_ALLBYTES;
96  } else if (word == "unused") {
97  bits = SEARCH_UNUSED;
98  } else if (word == "nonexe") {
99  bits = SEARCH_NONEXE;
100  } else if (word == "deadend") {
101  bits = SEARCH_DEADEND;
102  } else if (word == "unknown") {
103  bits = SEARCH_UNKNOWN;
104  } else if (word == "funcsyms") {
105  bits = SEARCH_FUNCSYMS;
106  } else if (word == "default") {
107  bits = SEARCH_DEFAULT;
108  if (howset==NOT_SPECIFIED) howset = SET_VALUE;
109  } else if (isdigit(word[0])) {
110  bits = strtol(word.c_str(), NULL, 0);
111  } else {
112  throw Exception("unknown disassembler heuristic: " + word);
113  }
114 
115  switch (howset) {
116  case SET_VALUE:
117  flags = 0;
118  case NOT_SPECIFIED:
119  case SET_BIT:
120  flags |= bits;
121  break;
122  case CLEAR_BIT:
123  flags &= ~bits;
124  break;
125  }
126 
127  at = comma==std::string::npos ? s.size() : comma+1;
128  }
129  return flags;
130 }
131 
132 /* Initialize the class. Thread safe. */
133 void
135 {
140  register_subclass(new DisassemblerX86(2)); /*16-bit*/
141  register_subclass(new DisassemblerX86(4)); /*32-bit*/
142  register_subclass(new DisassemblerX86(8)); /*64-bit*/
143  } RTS_INIT_END;
144 }
145 
146 /* Class method to register a new disassembler subclass. Thread safe. */
147 void
149 {
150  initclass();
152  ROSE_ASSERT(factory!=NULL);
153  disassemblers.push_back(factory);
154  } RTS_MUTEX_END;
155 }
156 
157 /* Class method. Thread safe by virtue of lookup(SgAsmGenericHeader*). */
158 Disassembler *
160 {
161  Disassembler *retval=NULL;
162  const SgAsmGenericHeaderPtrList &headers = interp->get_headers()->get_headers();
163  for (size_t i=0; i<headers.size(); i++) {
164  Disassembler *candidate = lookup(headers[i]);
165  if (retval && retval!=candidate)
166  throw Exception("interpretation has multiple disassemblers");
167  retval = candidate;
168  }
169  return retval;
170 }
171 
172 /* Class method. Thread safe. */
173 Disassembler *
175 {
176  initclass();
177  Disassembler *retval = NULL;
178 
180  for (size_t i=disassemblers.size(); i>0 && !retval; --i) {
181  if (disassemblers[i-1]->can_disassemble(header))
182  retval = disassemblers[i-1];
183  }
184  } RTS_MUTEX_END;
185 
186  if (retval)
187  return retval;
188  throw Exception("no disassembler for architecture");
189 }
190 
191 /* High-level function for disassembling a whole interpretation. */
192 void
194 {
195  InstructionMap insns = disassembleInterp(interp, successors, bad);
197  if (p_debug && !p_partitioner)
198  p->set_debug(get_debug());
199  SgAsmBlock *top = p->partition(interp, insns, interp->get_map());
200  interp->set_global_block(top);
201  top->set_parent(interp);
202  if (!p_partitioner)
203  delete p;
204 }
205 
206 /* Class method for backward compatability with the old Disassembler name space.
207  * Not thread safe because Partitioner::Partitioner is not. */
208 void
210 {
211  /* Create a new disassembler so we can modify its behavior locally. */
212  Disassembler *disassembler = Disassembler::lookup(interp);
213  assert(disassembler);
214  disassembler = disassembler->clone();
215  assert(disassembler);
216 
217  /* Search methods specified with "-rose:disassembler_search" are stored in the SgFile object. Use them rather than the
218  * defaults built into the Disassembler class. */
219  SgNode *file = SageInterface::getEnclosingNode<SgFile>(interp);
220  ROSE_ASSERT(file);
221  disassembler->set_search(isSgFile(file)->get_disassemblerSearchHeuristics());
222 
223  /* Partitioning methods are specified with "-rose:partitioner_search" and are stored in SgFile also. Use them rather than
224  * the default partitioner. */
225  Partitioner *partitioner = new Partitioner;
226  partitioner->set_search(isSgFile(file)->get_partitionerSearchHeuristics());
227 
228  /* Partitioner configuration file specified with "-rose:partitioner_config" is stored in SgFile. Use it rather than
229  * the default configuration file. */
230  partitioner->load_config(isSgFile(file)->get_partitionerConfigurationFileName());
231 
232  disassembler->set_partitioner(partitioner);
233  disassembler->disassemble(interp, NULL, NULL);
234 
235  delete disassembler;
236  delete partitioner;
237 }
238 
239 /* Accessor */
240 void
242 {
243  ROSE_ASSERT(n>0);
244  ROSE_ASSERT(n<=sizeof(rose_addr_t));
245  p_wordsize = n;
246 }
247 
248 /* Accessor */
249 void
251 {
252 #ifndef NDEBUG
253  int nbits=0;
254  for (size_t i=0; i<8*sizeof(n); i++)
255  nbits += (((size_t)1<<i) & n) ? 1 : 0;
256  ROSE_ASSERT(1==nbits);
257 #endif
258  p_alignment = n;
259 }
260 
261 /* Progress report class variables, all protected by class_mutex */
263 time_t Disassembler::progress_time = 0;
264 FILE *Disassembler::progress_file = stderr;
265 
266 /* Set progress reporting values. */
267 void
268 Disassembler::set_progress_reporting(FILE *output, unsigned min_interval)
269 {
272  progress_interval = min_interval;
273  } RTS_MUTEX_END;
274 }
275 
276 /* Produce a progress report if enabled. */
277 void
278 Disassembler::progress(FILE *debug, const char *fmt, ...) const
279 {
280  va_list ap;
281  va_start(ap, fmt);
282 
283  time_t now = time(NULL);
284 
286  if (0==progress_time)
287  progress_time = now;
288 
289  if (progress_file!=NULL && now-progress_time >= progress_interval) {
290  progress_time = now;
291  vfprintf(progress_file, fmt, ap);
292  }
293 
294  if (debug!=NULL)
295  vfprintf(debug, fmt, ap);
296  } RTS_MUTEX_END;
297 
298  va_end(ap);
299 }
300 
301 /* Update progress, keeping track of the number of instructions disassembled. */
302 void
304 {
305  if (insn)
306  p_ndisassembled++;
307 
308  progress(p_debug, "Disassembler[va 0x%08"PRIx64"]: disassembled %zu instructions\n",
309  insn?insn->get_address():(uint64_t)0, p_ndisassembled);
310 }
311 
312 /* Disassemble one instruction. */
314 Disassembler::disassembleOne(const unsigned char *buf, rose_addr_t buf_va, size_t buf_size, rose_addr_t start_va,
315  AddressSet *successors)
316 {
318  MemoryMap::Segment segment(buffer, 0, MemoryMap::MM_PROT_RX, "disassembleOne temp");
319  MemoryMap map;
320  map.insert(Extent(buf_va, buf_size), segment);
321  return disassembleOne(&map, start_va, successors);
322 }
323 
324 /* Disassemble one basic block. */
327 {
328  InstructionMap insns;
329  SgAsmInstruction *insn;
330  rose_addr_t va=0, next_va=start_va;
331 
332  if (p_debug)
333  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: disassembling basic block\n", start_va);
334 
335  do { /*tail recursion*/
336 
337  /* Disassemble each instruction of what we naively consider to be a basic block (semantic analysis may prove
338  * otherwise). This loop exits locally if we reach an address that cannot be disassembled (and we're not calling
339  * make_unknown_instruction()) or we reach an instruction that naively terminates a basic block. In the former case,
340  * INSN will be the last instruction, VA is its virtual address, and NEXT_VA is the address of the following
341  * instruction; otherwise INSN is null, VA is the address where disassembly failed, and NEXT_VA is meaningless. */
342  while (1) {
343  va = next_va;
344  insn = cache ? cache->get_value_or(va, NULL) : NULL;
345 
346  try {
347  if (!insn) {
348  insn = disassembleOne(map, va, NULL);
349  if (cache)
350  cache->insert(std::make_pair(va, insn));
351  }
352  } catch(const Exception &e) {
353  if ((p_search & SEARCH_UNKNOWN) && e.bytes.size()>0) {
354  insn = make_unknown_instruction(e);
355  if (cache)
356  cache->insert(std::make_pair(va, insn));
357  } else {
358  if (cache)
359  cache->insert(std::make_pair(va, (SgAsmInstruction*)0));
360  if (insns.size()==0 || !(p_search & SEARCH_DEADEND)) {
361  if (p_debug)
362  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: "
363  "disassembly failed in basic block 0x%08"PRIx64": %s\n",
364  e.ip, start_va, e.what());
365  if (!cache) {
366  for (InstructionMap::iterator ii=insns.begin(); ii!=insns.end(); ++ii)
367  SageInterface::deleteAST(ii->second);
368  }
369  throw;
370  }
371  /* Terminate tail recursion. Make sure we don't try to disassemble here again within this call, even if
372  * semantic analysis can prove that the next instruction address is the only possible successor. */
373  insn = NULL;
374  break;
375  }
376  }
377  assert(insn!=NULL);
378  next_va = va + insn->get_size();
379  insns.insert(std::make_pair(va, insn));
380 
381  /* Is this the end of a basic block? This is naive logic that bases the decision only on the single instruction.
382  * A more thorough analysis can be performed below in the get_block_successors() call. */
383  if (insn->terminates_basic_block()) {
384  if (p_debug)
385  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: \"%s\" at 0x%08"PRIx64" naively terminates block\n",
386  start_va, unparseMnemonic(insn).c_str(), va);
387  break;
388  }
389  }
390 
391  /* Try to figure out the successor addresses. If we can prove that the only successor is the address following the
392  * last instruction then we can continue disassembling as if this were a single basic block. */
393  bool complete=false;
394  AddressSet suc = get_block_successors(insns, &complete);
395  if (insn && complete && suc.size()==1 && *(suc.begin())==next_va) {
396  if (p_debug) {
397  fprintf(p_debug,
398  "Disassembler[va 0x%08"PRIx64"]: semantic analysis proves basic block continues after 0x%08"PRIx64"\n",
399  start_va, va);
400  }
401  } else {
402  insn = NULL; /*terminate recursion*/
403  }
404 
405  /* Save block successors in return value before we exit scope */
406  if (!insn && successors) {
407  successors->insert(suc.begin(), suc.end());
408  if (p_debug) {
409  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: basic block successors:", start_va);
410  for (AddressSet::iterator si=suc.begin(); si!=suc.end(); si++)
411  fprintf(p_debug, " 0x%08"PRIx64, *si);
412  fprintf(p_debug, "\n");
413  }
414  }
415  } while (insn);
416  return insns;
417 }
418 
419 /* Disassemble one basic block. */
421 Disassembler::disassembleBlock(const unsigned char *buf, rose_addr_t buf_va, size_t buf_size, rose_addr_t start_va,
422  AddressSet *successors, InstructionMap *cache)
423 {
425  MemoryMap::Segment segment(buffer, 0, MemoryMap::MM_PROT_RX, "disassembleBlock temp");
426  MemoryMap map;
427  map.insert(Extent(buf_va, buf_size), segment);
428  return disassembleBlock(&map, start_va, successors, cache);
429 }
430 
431 /* Disassemble reachable instructions from a buffer */
433 Disassembler::disassembleBuffer(const MemoryMap *map, size_t start_va, AddressSet *successors, BadMap *bad)
434 {
435  AddressSet worklist;
436  worklist.insert(start_va);
437  return disassembleBuffer(map, worklist, successors, bad);
438 }
439 
440 /* Disassemble reachable instructions from a buffer */
442 Disassembler::disassembleBuffer(const MemoryMap *map, AddressSet worklist, AddressSet *successors, BadMap *bad)
443 {
444  InstructionMap insns;
445 
446  // Helps speed up disassembleBlock() when SEARCH_DEADEND si disabled. The destructor deletes all the instruction ASTs,
447  // so call InstructionCache.clear() first if you want to keep them. Doing it this way (rather than deleting them in
448  // an exception handler) allows for better debugging--gdb will show you where the exception occurred rather than where we
449  // re-throw it.
450  struct InstructionCache: InstructionMap {
451  ~InstructionCache() {
452  for (iterator ii=begin(); ii!=end(); ++ii)
453  SageInterface::deleteAST(ii->second);
454  }
455  } icache;
456 
457  rose_addr_t next_search = 0;
458 
459  /* Per-buffer search methods */
460  if (p_search & SEARCH_WORDS)
461  search_words(&worklist, map, icache);
462 
463  /* Look for more addresses */
464  if (worklist.size()==0 && (p_search & (SEARCH_ALLBYTES|SEARCH_UNUSED))) {
465  bool avoid_overlap = (p_search & SEARCH_UNUSED) ? true : false;
466  search_next_address(&worklist, next_search, map, insns, icache, avoid_overlap);
467  if (worklist.size()>0)
468  next_search = *(--worklist.end())+1;
469  }
470 
471  while (worklist.size()>0) {
472  /* Get next address to disassemble */
473  AddressSet::iterator i = worklist.begin();
474  rose_addr_t va = *i;
475  worklist.erase(i);
476 
477  if (insns.find(va)!=insns.end() || (bad && bad->find(va)!=bad->end())) {
478  /* Skip this if we've already tried to disassemble it. */
479  } else if (!map->exists(va)) {
480  /* Any address that's outside the range we're allowed to work on will be added to the successors. */
481  if (successors)
482  successors->insert(va);
483  } else {
484  /* Disassemble a basic block and add successors to the work list. If a disassembly error occurs then
485  * disassembleBlock() will throw an exception that we'll add to the bad list. We must be careful when adding the
486  * basic block's instructions to the return value: although we check above to prevent disassembling the same
487  * basic block more than once, it's still possible that two basic blocks could overlap (e.g., block A could start
488  * at the second instruction of block B, or on a viariable-size instruction architecture, block A could start
489  * between instructions of block B and then become synchronized with B). */
490  InstructionMap bb;
491  try {
492  bb = disassembleBlock(map, va, &worklist, &icache);
493  insns.insert(bb.begin(), bb.end()); /*not inserted if already existing*/
494  assert(icache.exists(va));
495  } catch(const Exception &e) {
496  if (bad)
497  bad->insert(std::make_pair(va, e));
498  }
499 
500  /* Per-basicblock search methods */
502  search_following(&worklist, bb, va, map, icache);
504  search_immediate(&worklist, bb, map, icache);
505  }
506 
507  /* Look for more addresses */
508  if (worklist.size()==0 && (p_search & (SEARCH_ALLBYTES|SEARCH_UNUSED))) {
509  bool avoid_overlap = (p_search & SEARCH_UNUSED) ? true : false;
510  search_next_address(&worklist, next_search, map, insns, icache, avoid_overlap);
511  if (worklist.size()>0)
512  next_search = *(--worklist.end())+1;
513  }
514  }
515 
516  icache.clear(); // don't let the destructor delete the instructions
517  return insns;
518 }
519 
520 /* Add basic block following address to work list. */
521 void
523  const InstructionMap &tried)
524 {
525  rose_addr_t following_va = 0;
526  if (bb.empty()) {
527  following_va = bb_va+1;
528  } else {
529  InstructionMap::const_iterator bbi = bb.end();
530  --bbi;
531  SgAsmInstruction *last_insn = bbi->second;
532  following_va = last_insn->get_address() + last_insn->get_size();
533  }
534 
535  if (map->exists(following_va) && !tried.exists(following_va)) {
536  if (p_debug && worklist->find(following_va)==worklist->end()) {
537  rose_addr_t va = bb.begin()->first;
538  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: SEARCH_FOLLOWING added 0x%08"PRIx64"\n", va, following_va);
539  }
540  worklist->insert(following_va);
541  }
542 }
543 
544 /* Add values of immediate operands to work list */
545 void
546 Disassembler::search_immediate(AddressSet *worklist, const InstructionMap &bb, const MemoryMap *map, const InstructionMap &tried)
547 {
548  for (InstructionMap::const_iterator bbi=bb.begin(); bbi!=bb.end(); bbi++) {
549  const std::vector<SgAsmExpression*> &operands = bbi->second->get_operandList()->get_operands();
550  for (size_t i=0; i<operands.size(); i++) {
551  uint64_t constant=0;
553  size_t nbits = ival->get_significant_bits();
554  if (nbits!=16 && nbits!=32 && nbits!=64)
555  continue; /* Not an appropriately-sized constant */
556  constant = ival->get_value();
557  }
558  if (map->exists(constant) && !tried.exists(constant)) {
559  if (p_debug && worklist->find(constant)==worklist->end())
560  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: SEARCH_IMMEDIATE added 0x%08"PRIx64"\n",
561  bbi->first, constant);
562  worklist->insert(constant);
563  }
564  }
565  }
566 }
567 
568 /* Add word-aligned values to work list */
569 void
570 Disassembler::search_words(AddressSet *worklist, const MemoryMap *map, const InstructionMap &tried)
571 {
572  // Predicate is used only for its side effects
573  struct Visitor: public MemoryMap::Visitor {
574  Disassembler *d;
575  AddressSet *worklist;
576  const InstructionMap &tried;
577  Visitor(Disassembler *d, AddressSet *worklist, const InstructionMap &tried): d(d), worklist(worklist), tried(tried) {}
578  virtual bool operator()(const MemoryMap *map, const Extent &range, const MemoryMap::Segment &segment) {
579  rose_addr_t va = range.first();
580  va = ALIGN_UP(va, d->get_alignment());
581 
582  /* Scan through this segment */
583  while (va+d->get_wordsize() <= range.last()) {
584  rose_addr_t constant = 0; /*virtual address*/
585  unsigned char buf[sizeof constant];
586  assert(d->get_wordsize()<=sizeof constant);
587  if (map->read1(buf, va, d->get_wordsize())<d->get_wordsize())
588  break; /*shouldn't happen since we checked sizes above*/
589 
590  for (size_t i=0; i<d->get_wordsize(); i++) {
591  switch (d->get_sex()) {
593  constant |= buf[i] << (8*i);
594  break;
596  constant |= buf[i] << (8*(d->get_wordsize()-(i+1)));
597  break;
598  default:
599  ROSE_ASSERT(!"not implemented");
600  }
601  }
602  if (map->exists(constant) && !tried.exists(constant)) {
603  if (d->get_debug() && worklist->find(constant)==worklist->end())
604  fprintf(d->get_debug(), "Disassembler[va 0x%08"PRIx64"]: SEARCH_WORD added 0x%08"PRIx64"\n", va, constant);
605  worklist->insert(constant);
606  }
607  va += d->get_alignment();
608  }
609  return true;
610  }
611  } visitor(this, worklist, tried);
612  map->traverse(visitor);
613 }
614 
615 /* Find next unused address. */
616 void
618  const InstructionMap &insns, const InstructionMap &tried, bool avoid_overlap)
619 {
620  /* Assume a maximum instruction size so that while we search backward (by virtual address) through previously
621  * disassembled instructions we don't have to go all the way to the beginning of the instruction map to prove that an
622  * instruction doesn't overlap with a specified address. */
623  rose_addr_t next_va = start_va;
624 
625  while (1) {
626 
627  /* Advance to the next valid mapped address if necessary by scanning for the first map element that has a higher
628  * virtual address and is executable. */
630  if (si==map->segments().end())
631  return; // no subsequent valid mapped address
632  const Extent &range = si->first;
633  const MemoryMap::Segment &segment = si->second;
634  assert(range.last()>=next_va);
635 
636  if (0==(segment.get_mapperms() & MemoryMap::MM_PROT_EXEC)) {
637  next_va = range.last() + 1;
638  continue;
639  }
640 
641  next_va = std::max(next_va, range.first());
642 
643  /* If we tried to disassemble at this address and failed, then try the next address. */
644  if (tried.exists(next_va)) {
645  next_va++;
646  continue; /*tail recursion*/
647  }
648 
649  if (avoid_overlap) {
650  /* Are there any instructions that overlap with this address? */
651  SgAsmInstruction *overlap = find_instruction_containing(insns, next_va);
652  if (overlap) {
653  next_va = overlap->get_address() + overlap->get_size() + 1;
654  continue; /*tail recursion*/
655  }
656  } else if (insns.find(next_va)!=insns.end()) {
657  /* There is an instruction starting at this address */
658  next_va++;
659  continue; /*tail recursion*/
660  }
661 
662  if (p_debug)
663  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: SEARCH_%s added 0x%08"PRIx64"\n",
664  start_va, avoid_overlap?"UNUSED":"ALLBYTES", next_va);
665 
666  worklist->insert(next_va);
667  return;
668  }
669 }
670 
671 void
673 {
674  struct T: public AstSimpleProcessing {
675  T(AddressSet *wl, const MemoryMap *map, FILE *f)
676  : worklist(wl), map(map), p_debug(f) {}
677  void visit(SgNode *node) {
679  if (symbol && symbol->get_type()==SgAsmGenericSymbol::SYM_FUNC) {
680  SgAsmGenericSection *section = symbol->get_bound();
681  if (section && (section->is_mapped() || section->get_contains_code())) {
682  rose_addr_t va = section->get_mapped_actual_va();
683  if (map->exists(va)) {
684  if (p_debug)
685  fprintf(p_debug, "Disassembler: SEARCH_FUNCSYMS added 0x%08"PRIx64" for \"%s\"\n",
686  va, symbol->get_name()->get_string(true).c_str());
687  worklist->insert(va);
688  }
689  }
690  }
691  }
692  AddressSet *worklist;
693  const MemoryMap *map;
694  FILE *p_debug;
695  } t(worklist, map, p_debug);
696  t.traverse(header, preorder);
697 }
698 
701 {
702  const size_t max_insns_size = 16;
703  InstructionMap::const_iterator ii=insns.upper_bound(va);
704  if (ii==insns.begin())
705  return NULL;
706  while (1) {
707  --ii;
708  ROSE_ASSERT(ii->first <= va);
709  if (ii->first + max_insns_size < va)
710  return NULL;
711  if (ii->first + ii->second->get_size() > va)
712  return ii->second;
713  if (ii==insns.begin())
714  return NULL;
715  }
716 }
717 
718 /* Disassemble reachable instructions from a buffer */
720 Disassembler::disassembleBuffer(const unsigned char *buf, rose_addr_t buf_va, size_t buf_size, rose_addr_t start_va,
721  AddressSet *successors, BadMap *bad)
722 {
723  MemoryMap map;
724  map.insert(Extent(buf_va, buf_size),
726  MemoryMap::MM_PROT_RX, "disassembleBuffer temp"));
727  return disassembleBuffer(&map, start_va, successors, bad);
728 }
729 
730 /* Disassemble instructions in a single section. */
733  AddressSet *successors, BadMap *bad)
734 {
735  SgAsmGenericFile *file = section->get_file();
736  ROSE_ASSERT(file!=NULL);
737  const void *file_buf = &(file->get_data()[0]);
738 
739  MemoryMap::Segment sgmt(MemoryMap::ExternBuffer::create(file_buf, section->get_size()), 0,
740  MemoryMap::MM_PROT_RX, section->get_name()->get_string());
741  MemoryMap map;
742  map.insert(Extent(section_va, section->get_size()), sgmt);
743  return disassembleBuffer(&map, section_va+start_offset, successors, bad);
744 }
745 
746 /* Disassemble instructions for an interpretation (set of headers) */
749 {
750  const SgAsmGenericHeaderPtrList &headers = interp->get_headers()->get_headers();
751  AddressSet worklist;
752 
753  /* Use the register dictionary attached to the interpretation, if any. */
754  if (interp->get_registers())
755  set_registers(interp->get_registers());
756 
757  /* Use the memory map attached to the interpretation, or build a new one and attach it. */
758  MemoryMap *map = interp->get_map();
759  if (!map) {
760  if (p_debug)
761  fprintf(p_debug, "Disassembler: no memory map; remapping all sections\n");
762  BinaryLoader *loader = BinaryLoader::lookup(interp);
763  assert(loader);
764  loader = loader->clone();
765  assert(loader);
766  loader->set_perform_dynamic_linking(false);
767  loader->set_perform_remap(true);
768  loader->set_perform_relocations(false);
769  loader->load(interp);
770  map = interp->get_map();
771  }
772  ROSE_ASSERT(map);
773  if (p_debug) {
774  fprintf(p_debug, "Disassembler: MemoryMap for disassembly:\n");
775  map->dump(p_debug, " ");
776  }
777 
778  /* Seed disassembly with entry points and function symbols from each header. */
779  for (size_t i=0; i<headers.size(); i++) {
780  SgRVAList entry_rvalist = headers[i]->get_entry_rvas();
781  for (size_t j=0; j<entry_rvalist.size(); j++) {
782  rose_addr_t entry_va = entry_rvalist[j].get_rva() + headers[i]->get_base_va();
783  worklist.insert(entry_va);
784  if (p_debug)
785  fprintf(p_debug, "Disassembler[va 0x%08"PRIx64"]: entry point\n", entry_va);
786  }
788  search_function_symbols(&worklist, map, headers[i]);
789  }
790 
791  /* Do not require execute permission if the user wants to disassemble everything. */
792  /* FIXME: Not thread safe! [RPM 2011-01-27] */
793  unsigned orig_protections = get_protection();
794  if (p_search & SEARCH_NONEXE)
795  set_protection(orig_protections & ~MemoryMap::MM_PROT_EXEC);
796 
797  /* Disassemble all that we've mapped, according to aggressiveness settings. */
798  InstructionMap retval;
799  try {
800  retval = disassembleBuffer(map, worklist, successors, bad);
801  } catch (...) {
802  set_protection(orig_protections);
803  throw;
804  }
805 
806 #if 0
807  /* Mark the parts of the file corresponding to the instructions as having been referenced, since this is part of parsing.
808  *
809  * NOTE: I turned this off because it's slow if there's a lot of instructions (e.g., about 20s/million instructions on my
810  * machine). If the user really needs to know this information they can probably calculate it using an ExtentMap and
811  * traversing the instructions in the final AST. Another problem is that since the disassembler runs before the
812  * partitioner, and the partitioner might throw away unused instructions, calculating the references here in the
813  * disassembler is not accurate. [RPM 2010-04-30]
814  *
815  * NOTE: Since mark_referenced_instructions() is not thread safe, its inclusion here would cause this method to be not
816  * thread safe also. [RPM 2011-01-27] */
817  mark_referenced_instructions(interp, map, retval);
818 #endif
819 
820  return retval;
821 }
822 
823 /* Re-read instruction bytes from file if necessary in order to mark them as referenced. */
824 void
826 {
827  unsigned char buf[32];
828  SgAsmGenericFile *file = NULL;
829  const SgAsmGenericFilePtrList &files = interp->get_files();
830  bool was_tracking = false; // only valid when file!=NULL (value here is to shut of used-before-defined warnings from GCC)
832 
833  /* Re-read each instruction so the file has a chance to track the reference. */
834  try {
835  for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
836  SgAsmInstruction *insn = ii->second;
837  ROSE_ASSERT(insn->get_size()<=sizeof buf);
838  rose_addr_t va = insn->get_address();
839  size_t nbytes = insn->get_size();
840 
841  while (nbytes>0) {
842  /* Find the memory map segment and the file that goes with that segment (if any) */
843  if (si==map->segments().end() || !si->first.contains(Extent(va))) {
844  if (file) {
845  file->set_tracking_references(was_tracking);
846  file = NULL;
847  }
848  si = map->segments().find(va);
849  if (si==map->segments().end()) {
850  /* This byte of the instruction is not mapped. Perhaps the next one is. */
851  ++va;
852  --nbytes;
853  continue;
854  }
855 
856  /* Find the file that goes with this segment. */
857  for (size_t i=0; i<files.size(); i++) {
858  if (&(files[i]->get_data()[0]) == si->second.get_buffer()->get_data_ptr()) {
859  file = files[i];
860  was_tracking = file->get_tracking_references();
861  file->set_tracking_references(true);
862  break;
863  }
864  }
865 
866  }
867 
868  /* Read the file for its reference tracking side effect. */
869  size_t sgmt_offset = va - si->first.first();
870  size_t n = std::min(nbytes, (size_t)si->first.size()-sgmt_offset);
871  if (file) {
872  size_t file_offset = si->second.get_buffer_offset() + sgmt_offset;
873  file->read_content(file_offset, buf, n, false);
874  }
875  nbytes -= n;
876  va += n;
877  }
878  }
879  if (file)
880  file->set_tracking_references(was_tracking);
881  } catch(...) {
882  if (file)
883  file->set_tracking_references(was_tracking);
884  throw;
885  }
886 }
887 
888 /* Add last instruction's successors to returned successors. */
891 {
892  std::vector<SgAsmInstruction*> block;
893  for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii)
894  block.push_back(ii->second);
895  Disassembler::AddressSet successors = block.front()->get_successors(block, complete);
896 
897  /* For the purposes of disassembly, assume that a CALL instruction eventually executes a RET that causes execution to
898  * resume at the address following the CALL. This is true 99% of the time. Higher software layers (e.g., Partitioner) may
899  * make other assumptions, which is why this code is not in SgAsmx86Instruction::get_successors(). [RPM 2010-05-09] */
900  rose_addr_t target, return_va;
901  SgAsmInstruction *last_insn = block.back();
902  if (last_insn->is_function_call(block, &target, &return_va))
903  successors.insert(return_va);
904 
905  return successors;
906 }