8 #include "AsmUnparser_compat.h"
10 #include "PartialSymbolicSemantics.h"
11 #include "stringify.h"
13 #include "PartialSymbolicSemantics2.h"
14 #include "DispatcherX86.h"
69 progress_interval = min_interval;
76 time_t now =
time(NULL);
81 if (progress_file!=NULL && now-progress_time >= progress_interval) {
85 vfprintf(progress_file, fmt, ap);
92 vfprintf(debug, fmt, ap);
102 while (at<s.size()) {
103 enum { SET_BIT, CLEAR_BIT, SET_VALUE, NOT_SPECIFIED } howset = NOT_SPECIFIED;
108 }
else if (s[at]==
'+') {
111 }
else if (s[at]==
'=') {
116 throw Exception(
"heuristic name must follow qualifier");
118 size_t comma = s.find(
",", at);
119 std::string word = std::string(s, at, comma-at);
121 throw Exception(
"heuristic name must follow comma");
124 if (word==
"entry" || word==
"entry_point") {
126 }
else if (word==
"call_target") {
128 }
else if (word==
"call_insn") {
130 }
else if (word==
"call") {
132 }
else if (word==
"eh" || word==
"eh_frame") {
134 }
else if (word==
"import") {
136 }
else if (word==
"export") {
138 }
else if (word==
"symbol") {
140 }
else if (word==
"pattern") {
142 }
else if (word==
"userdef") {
144 }
else if (word==
"pad" || word==
"padding" || word==
"interpad") {
146 }
else if (word==
"intrablock") {
148 }
else if (word==
"thunk") {
150 }
else if (word==
"misc" || word==
"miscellaneous" || word==
"interpadfunc") {
152 }
else if (word==
"unassigned" || word==
"unclassified" || word==
"leftover" || word==
"leftovers") {
154 }
else if (word==
"default") {
156 if (howset==NOT_SPECIFIED) howset = SET_VALUE;
157 }
else if (isdigit(word[0])) {
158 bits = strtol(word.c_str(), NULL, 0);
160 throw Exception(
"unknown partitioner heuristic: \"" + word +
"\"");
175 at = comma==std::string::npos ? s.size() : comma+1;
187 this->ro_map = *ro_map;
193 this->ro_map.
clear();
201 using namespace BinaryAnalysis::InstructionSemantics2;
205 if (!insn_x86 || (insn_x86->get_kind()!=
x86_jmp && insn_x86->get_kind()==
x86_farjmp) ||
206 1!=insn_x86->get_operandList()->get_operands().size())
208 SgAsmExpression *target_expr = insn_x86->get_operandList()->get_operands()[0];
217 PartialSymbolicSemantics::RiscOperatorsPtr ops = PartialSymbolicSemantics::RiscOperators::instance(regdict);
218 BaseSemantics::DispatcherPtr dispatcher = DispatcherX86::instance(ops);
219 ops->set_memory_map(&ro_map);
221 for (
size_t i=0; i<bb->
insns.size(); ++i) {
224 dispatcher->processInstruction(insn_x86);
236 BaseSemantics::SValuePtr eip = ops->readRegister(*REG_EIP);
237 static const size_t entry_size = 4;
238 uint8_t *buf =
new uint8_t[entry_size];
239 if (!eip->is_number()) {
240 BaseSemantics::MemoryCellListPtr mem = BaseSemantics::MemoryCellList::promote(ops->get_state()->get_memory_state());
241 for (BaseSemantics::MemoryCellList::CellList::iterator mi=mem->get_cells().begin(); mi!=mem->get_cells().end(); ++mi) {
242 BaseSemantics::MemoryCellPtr cell = *mi;
243 if (cell->get_address()->is_number() && cell->get_value()->must_equal(eip)) {
244 rose_addr_t base_va = cell->get_address()->get_number();
247 size_t nread = ro_map.read(buf, base_va+nentries*entry_size, entry_size);
248 if (nread!=entry_size)
251 for (
size_t i=0; i<entry_size; i++)
252 target_va |= buf[i] << (i*8);
255 successors.insert(target_va);
260 table_extent->
insert(
Extent(base_va, nentries*entry_size));
262 DataBlock *dblock = find_db_starting(base_va, nentries*entry_size);
266 fprintf(debug,
"[jump table at 0x%08"PRIx64
"+%zu*%zu]", base_va, nentries, entry_size);
281 assert(bb!=NULL && !bb->
insns.empty());
285 std::vector<SgAsmInstruction*> inodes;
286 for (InstructionVector::const_iterator ii=bb->
insns.begin(); ii!=bb->
insns.end(); ++ii)
298 if (!table_entries.empty()) {
299 bb->
cache.
sucs.insert(table_entries.begin(), table_entries.end());
301 std::ostringstream ss;
302 ss <<
"[jump table at " <<table_extent <<
"]";
303 fprintf(debug,
"%s", ss.str().c_str());
314 bool looks_like_call = bb->
insns.front()->node->is_function_call(inodes, &target_va, NULL);
315 if (looks_like_call && target_va!=fallthrough_va) {
325 bb->
insns.front()->node->is_function_return(inodes);
360 for (Disassembler::AddressSet::const_iterator si=bb->
cache.
sucs.begin(); si!=bb->
cache.
sucs.end(); ++si)
361 retval.insert(canonic_block(*si));
372 if (call_target_va!=NO_TARGET) {
373 Instruction *target_insn = find_instruction(call_target_va,
true);
374 BasicBlock *target_bb = target_insn ? find_bb_starting(call_target_va,
false) : NULL;
378 retval.insert(fall_through_va);
379 }
else if (target_bb && target_bb->
function) {
383 retval.insert(fall_through_va);
384 }
else if (target_bb) {
387 retval.insert(fall_through_va);
393 retval.insert(fall_through_va);
396 retval.insert(fall_through_va);
424 using namespace BinaryAnalysis::InstructionSemantics;
426 bool on_stack =
true;
429 BasicBlock *bb = find_bb_containing(va,
false);
430 bool preexisting = bb!=NULL;
431 if (!bb) bb = find_bb_containing(va);
432 if (!bb)
return false;
437 typedef PartialSymbolicSemantics::Policy<> Policy;
438 typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics;
440 policy.set_map(get_map());
441 PartialSymbolicSemantics::ValueType<32> orig_retaddr;
442 policy.writeMemory(
x86_segreg_ss, policy.readRegister<32>(
"esp"), orig_retaddr, policy.true_());
443 Semantics semantics(policy);
446 fputs(
"Partitioner::pops_return_address:\n", stderr);
449 for (InstructionVector::iterator ii=bb->insns.begin(); ii!=bb->insns.end(); ++ii) {
451 if (!insn)
return false;
453 semantics.processInstruction(insn);
455 std::ostringstream s;
456 s <<
"Analysis for " <<unparseInstructionWithAddress(insn) <<std::endl
458 fputs(s.str().c_str(), stderr);
461 on_stack = policy.on_stack(orig_retaddr);
462 if (!on_stack && debug)
463 fprintf(debug,
"[B%08"PRIx64
"#%zu discards return address]", va, bb->insns.size());
464 }
catch (
const Semantics::Exception&) {
466 }
catch (
const Policy::Exception&) {
489 assert(!insns.empty());
490 return insns.front()->get_address();
498 assert(!nodes.empty());
499 return nodes.front()->get_address();
519 assert(insns.size()>0);
527 for (std::set<DataBlock*>::iterator di=data_blocks.begin(); di!=data_blocks.end(); ++di)
528 (*di)->basic_block = NULL;
536 for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi)
537 bi->second->function = NULL;
538 basic_blocks.clear();
545 for (DataBlocks::iterator bi=data_blocks.begin(); bi!=data_blocks.end(); ++bi)
546 bi->second->function = NULL;
555 basic_blocks[bbi->first] = bbi->second;
556 bbi->second->function =
this;
560 heads.insert(other->
heads.begin(), other->
heads.end());
561 other->
heads.clear();
569 data_blocks[dbi->first] = dbi->second;
570 dbi->second->function =
this;
578 switch (get_may_return()) {
580 set_may_return(new_value);
584 set_may_return(new_value);
597 for (
size_t i=0; i<may_return_str.size(); ++i)
598 may_return_str[i] = tolower(may_return_str[i]);
599 fprintf(debug,
"{nbblocks=%zu, ndblocks=%zu, may-return=%s}",
600 basic_blocks.size(), data_blocks.size(), may_return_str.c_str());
607 BasicBlocks::const_iterator bi=basic_blocks.find(entry_va);
608 return bi==basic_blocks.end() ? NULL : bi->second;
616 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
617 fi->second->clear_basic_blocks();
618 fi->second->clear_data_blocks();
625 for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi)
627 basic_blocks.clear();
630 for (DataBlocks::iterator bi=data_blocks.begin(); bi!=data_blocks.end(); ++bi)
635 for (BlockConfigMap::iterator bci=block_config.begin(); bci!=block_config.end(); ++bci)
637 block_config.clear();
641 for (InstructionMap::iterator ii=insns.begin(); ii!=insns.end(); ++ii)
646 clear_disassembler_errors();
650 delete aggregate_mean; aggregate_mean = NULL;
651 delete aggregate_variance; aggregate_variance = NULL;
652 code_criteria = NULL;
657 if (filename.empty())
662 int fd = open(filename.c_str(), O_RDONLY);
667 char *config =
new char[sb.st_size];
668 ssize_t nread = read(fd, config, sb.st_size);
669 if (nread<0 || nread<sb.st_size) {
691 assert(bb==find_bb_containing(va));
694 InstructionVector::iterator cut = bb->
insns.begin();
695 while (cut!=bb->
insns.end() && (*cut)->get_address()!=va) ++cut;
696 assert(cut!=bb->
insns.begin());
699 for (InstructionVector::iterator ii=cut; ii!=bb->
insns.end(); ++ii) {
704 if (cut!=bb->
insns.end()) {
716 assert(NULL==insn->
bblock);
718 bb->
insns.push_back(insn);
857 BasicBlock *bb = find_bb_containing(va,
false);
859 if (discard_entire_block) {
861 }
else if (bb->
insns.front()==insn) {
880 for (InstructionVector::iterator ii=bb->
insns.begin(); ii!=bb->
insns.end(); ++ii) {
890 basic_blocks.erase(bb->
address());
900 InstructionMap::iterator ii = insns.find(va);
901 if (create && disassembler && ii==insns.end() && bad_insns.find(va)==bad_insns.end()) {
904 insn =
new Instruction(disassembler->disassembleOne(map, va, NULL));
905 ii = insns.insert(std::make_pair(va, insn)).first;
907 bad_insns.insert(std::make_pair(va, e));
910 return ii==insns.end() ? NULL : ii->second;
938 if (!create || insn->
bblock!=NULL)
943 basic_blocks.insert(std::make_pair(va, bb));
959 }
else if (allow_discont_blocks) {
960 if (!complete || sucs.size()!=1)
962 va = *(sucs.begin());
964 if (!complete || sucs.size()!=1 || *(sucs.begin())!=va)
970 insn = find_instruction(va);
971 if (!insn || insn->
bblock)
983 BasicBlock *bb = find_bb_containing(va, create);
991 fprintf(debug,
"[split from B%08"PRIx64
"#%zu]", bb->
address(), bb->
insns.size());
995 bb = find_bb_containing(va);
1007 for (
size_t i=0; i<100; i++) {
1008 BasicBlock *bb = find_bb_starting(va,
false);
1010 if (debug) fprintf(debug,
"[B%08"PRIx64
"->B%08"PRIx64
"]", va, bb->
cache.
alias_for);
1013 assert(!
"possible alias loop");
1021 Functions::iterator fi = functions.find(entry_va);
1022 if (fi==functions.end())
return NULL;
1031 Functions::iterator fi = functions.find(entry_va);
1032 if (fi==functions.end()) {
1033 f =
new Function(entry_va, reasons, name);
1034 functions[entry_va] = f;
1050 using namespace BinaryAnalysis::InstructionSemantics;
1052 for (BlockConfigMap::iterator bci=block_config.begin(); bci!=block_config.end(); ++bci) {
1064 truncate(bb, bb->
insns[bconf->
ninsns]->get_address());
1067 update_analyses(bb);
1078 char block_name_str[64];
1079 sprintf(block_name_str,
"B%08"PRIx64, va);
1080 std::string block_name = block_name_str;
1081 if (debug) fprintf(stderr,
"running successors program for %s\n", block_name_str);
1087 typedef PartialSymbolicSemantics::Policy<> Policy;
1088 typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics;
1090 policy.set_map(map);
1091 Semantics semantics(policy);
1093 if (debug) fprintf(stderr,
" running semantics for the basic block...\n");
1094 for (InstructionVector::iterator ii=bb->
insns.begin(); ii!=bb->
insns.end(); ++ii) {
1097 semantics.processInstruction(insn);
1102 if (debug) fprintf(stderr,
" loading the program...\n");
1111 static const size_t stack_size = 8192;
1119 static const size_t svec_size = 8192;
1131 fprintf(stderr,
" memory map after program is loaded:\n");
1132 map->
dump(stderr,
" ");
1136 if (debug) fprintf(stderr,
" setting up the call frame...\n");
1140 policy.writeMemory<32>(
x86_segreg_ss, policy.number<32>(stack_ptr),
1141 policy.readRegister<32>(
"esp"), policy.true_());
1145 policy.writeMemory<32>(
x86_segreg_ss, policy.number<32>(stack_ptr),
1146 policy.number<32>(bb->
insns.back()->get_address()+bb->
insns.back()->get_size()),
1151 policy.writeMemory<32>(
x86_segreg_ss, policy.number<32>(stack_ptr),
1152 policy.number<32>(bb->
insns.front()->get_address()), policy.true_());
1156 policy.writeMemory<32>(
x86_segreg_ss, policy.number<32>(stack_ptr),
1157 policy.number<32>(svec_size), policy.true_());
1161 policy.writeMemory<32>(
x86_segreg_ss, policy.number<32>(stack_ptr),
1162 policy.number<32>(svec_va), policy.true_());
1166 policy.writeMemory<32>(
x86_segreg_ss, policy.number<32>(stack_ptr),
1167 policy.number<32>(return_va), policy.true_());
1170 policy.writeRegister(
"esp", policy.number<32>(stack_ptr));
1173 if (debug) fprintf(stderr,
" running the program...\n");
1175 assert(disassembler!=NULL);
1176 policy.writeRegister(
"eip", policy.number<32>(text_va));
1178 rose_addr_t ip = policy.readRegister<32>(
"eip").known_value();
1179 if (ip==return_va)
break;
1181 if (debug) fprintf(stderr,
" 0x%08"PRIx64
": %s\n", ip, insn?unparseInstruction(insn).c_str():
"<null>");
1183 semantics.processInstruction(insn);
1184 assert(policy.readRegister<32>(
"eip").is_known());
1189 if (debug) fprintf(stderr,
" extracting program return values...\n");
1190 PartialSymbolicSemantics::ValueType<32> nsucs = policy.readMemory<32>(
x86_segreg_ss, policy.number<32>(svec_va),
1192 assert(nsucs.is_known());
1193 if (debug) fprintf(stderr,
" number of successors: %"PRId64
"\n", nsucs.known_value());
1194 assert(nsucs.known_value()*4 <= svec_size-4);
1195 for (
size_t i=0; i<nsucs.known_value(); i++) {
1196 PartialSymbolicSemantics::ValueType<32> suc_va = policy.readMemory<32>(
x86_segreg_ss,
1197 policy.number<32>(svec_va+4+i*4),
1199 if (suc_va.is_known()) {
1200 if (debug) fprintf(stderr,
" #%zu: 0x%08"PRIx64
"\n", i, suc_va.known_value());
1201 bb->
cache.
sucs.insert(suc_va.known_value());
1203 if (debug) fprintf(stderr,
" #%zu: unknown\n", i);
1209 if (debug) fprintf(stderr,
" unmapping the program...\n");
1210 map->
erase(text_sgmt);
1211 map->
erase(stack_sgmt);
1212 map->
erase(svec_sgmt);
1214 if (debug) fprintf(stderr,
" done.\n");
1227 if ((entries.empty() || (1==entries.size() && 0==entries[0].get_rva())) &&
1232 for (
size_t i=0; i<entries.size(); i++) {
1234 if (find_instruction(entry_va))
1244 for (
size_t i=0; i<sections->
get_sections().size(); i++) {
1246 if (ehframe!=NULL) {
1248 for (
size_t j=0; j<ci_entries->
get_entries().size(); j++) {
1251 for (
size_t k=0; k<fd_entries->
get_entries().size(); k++) {
1254 if (find_instruction(target))
1274 if (!gotplt || !gotplt->
is_mapped())
return;
1277 std::set<SgAsmElfRelocSection*> rsects;
1279 for (SgAsmGenericSectionPtrList::const_iterator si=sections.begin(); si!=sections.end(); ++si) {
1282 rsects.insert(reloc_section);
1284 if (rsects.empty())
return;
1289 while (plt_offset<plt->get_mapped_size()) {
1299 if (!insn_x86)
continue;
1309 for (std::set<SgAsmElfRelocSection*>::iterator ri=rsects.begin(); ri!=rsects.end() && name.empty(); ++ri) {
1313 for (
size_t ei=0; ei<entries->
get_entries().size() && name.empty() && symbols; ++ei) {
1316 unsigned long symbol_idx = rel->
get_sym();
1317 if (symbol_idx < symbols->get_symbols().size()) {
1332 if (
"abort@plt"!=name &&
"execl@plt"!=name &&
"execlp@plt"!=name &&
"execv@plt"!=name &&
"execvp@plt"!=name &&
1333 "exit@plt"!=name &&
"_exit@plt"!=name &&
"fexecve@plt"!=name &&
1334 "longjmp@plt"!=name &&
"__longjmp@plt"!=name &&
"siglongjmp@plt"!=name) {
1347 for (
size_t i=0; i<sections->
get_sections().size(); i++) {
1350 std::vector<SgAsmGenericSymbol*> symbols;
1353 for (
size_t j=0; j<elf_symbols->
get_symbols().size(); j++) {
1358 for (
size_t j=0; j<coff_symbols->
get_symbols().size(); j++) {
1359 symbols.push_back(coff_symbols->
get_symbols()[j]);
1363 for (
size_t j=0; j<symbols.size(); j++) {
1374 if (section!=NULL && section->
is_mapped() &&
1378 if (find_instruction(va_1))
1385 if (find_instruction(value))
1397 for (
size_t i=0; i<sections->
get_sections().size(); ++i) {
1400 for (SgAsmPEExportEntryPtrList::const_iterator ei=exports.begin(); ei!=exports.end(); ++ei) {
1401 rose_addr_t va = (*ei)->get_export_rva().get_va();
1402 if (find_instruction(va))
1410 Partitioner::InstructionMap::const_iterator
1413 InstructionMap::const_iterator ii = first;
1422 if (opands.size()!=2)
1434 matches.insert(ii->first);
1435 ii = insns.find(ii->first + insn->
get_size());
1440 if (ii==insns.end())
1446 if (opands.size()!=1)
1453 matches.insert(ii->first);
1454 ii = insns.find(ii->first + insn->
get_size());
1459 if (ii==insns.end())
1465 if (opands.size()!=2)
1477 matches.insert(ii->first);
1480 exclude.insert(matches.begin(), matches.end());
1486 Partitioner::InstructionMap::const_iterator
1489 InstructionMap::const_iterator ii = first;
1493 for (
size_t i=0; i<3; i++) {
1495 if (!nop)
return insns.end();
1498 matches.insert(ii->first);
1499 ii = insns.find(ii->first + nop->
get_size());
1500 if (ii==insns.end())
return insns.end();
1505 if (!notnop)
return insns.end();
1507 matches.insert(ii->first);
1509 exclude.insert(matches.begin(), matches.end());
1516 Partitioner::InstructionMap::const_iterator
1519 InstructionMap::const_iterator ii = first;
1523 for (
size_t i=0; i<3; i++) {
1525 if (!insn)
return insns.end();
1530 matches.insert(ii->first);
1531 ii = insns.find(ii->first + insn->
get_size());
1532 if (ii==insns.end())
return insns.end();
1538 if (!insn)
return insns.end();
1540 matches.insert(ii->first);
1541 ii = insns.find(ii->first + insn->
get_size());
1542 if (ii==insns.end())
return insns.end();
1547 if (!insn)
return insns.end();
1548 matches.insert(ii->first);
1550 exclude.insert(matches.begin(), matches.end());
1566 virtual bool operator()(
bool enabled,
const Args &args) {
1567 assert(args.restrict_map!=NULL);
1569 static const size_t patternsz=16;
1570 assert(patternsz<
sizeof buf);
1573 while (va<=args.range.last()) {
1574 size_t nbytes = std::min(args.range.last()+1-va, (
rose_addr_t)
sizeof buf);
1575 size_t nread = args.restrict_map->read(buf, va, nbytes);
1576 for (
size_t i=0; i<nread; ++i) {
1578 if (i+3<nread && 0x55==buf[i+0] && 0x8b==buf[i+1] && 0xec==buf[i+2]) {
1591 scan_unassigned_bytes(&t1, mm);
1596 InstructionMap::const_iterator found;
1598 for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1599 if (exclude.find(ii->first)==exclude.end() && (found=pattern1(insns, ii, exclude))!=insns.end())
1603 for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1604 if (exclude.find(ii->first)==exclude.end() && (found=pattern2(insns, ii, exclude))!=insns.end())
1609 for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1610 if (exclude.find(ii->first)==exclude.end() && (found=pattern3(insns, ii, exclude))!=insns.end())
1622 for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1623 std::vector<SgAsmInstruction*> iv;
1624 iv.push_back(ii->second->node);
1626 if (ii->second->node->is_function_call(iv, &target_va, NULL) && target_va!=NO_TARGET &&
1627 target_va!=ii->first + ii->second->get_size()) {
1638 while (!insns.empty()) {
1641 InstructionMap::iterator ii = insns.find(va);
1643 while (ii!=insns.end()) {
1644 contig.push_back(ii->second);
1645 va += ii->second->get_size();
1646 ii = insns.find(va);
1649 for (
size_t i=0; i<contig.size(); i++)
1650 insns.erase(contig[i]->get_address());
1667 for (InstructionMap::iterator ai=all.begin(); ai!=all.end(); ++ai) {
1668 BasicBlock *bb = find_bb_containing(ai->first,
false);
1671 if (!range.empty()) {
1672 scan_contiguous_insns(range, cblist, prev, ai->second);
1681 scan_contiguous_insns(range, cblist, prev, NULL);
1693 virtual bool operator()(
bool enabled,
const Args &args) {
1695 if (!args.insn_prev || !args.insn_end)
1697 BasicBlock *bb_lt = args.partitioner->find_bb_containing(args.insn_prev->get_address(),
false);
1698 BasicBlock *bb_rt = args.partitioner->find_bb_containing(args.insn_end->get_address(),
false);
1708 scan_unassigned_insns(cblist2);
1720 virtual bool operator()(
bool enabled,
const Args &args) {
1722 if (!args.insn_prev || !args.insn_end)
1724 BasicBlock *bb_lt = args.partitioner->find_bb_containing(args.insn_prev->get_address(),
false);
1725 BasicBlock *bb_rt = args.partitioner->find_bb_containing(args.insn_end->get_address(),
false);
1735 scan_unassigned_insns(cblist2);
1746 function_extent(&assigned);
1766 virtual bool operator()(
bool enabled,
const Args &args) {
1773 if (prev==args.ranges.end())
1778 if (next==args.ranges.end())
1782 enabled = prev->second.get()==next->second.get();
1789 scan_unassigned_bytes(cblist2, restrict_map);
1799 virtual bool operator()(
bool enabled,
const Args &args) {
1806 if (prev==args.ranges.end())
1811 if (next==args.ranges.end())
1815 enabled = prev->second.get()!=next->second.get();
1822 scan_unassigned_bytes(cblist2, restrict_map);
1834 if (patterns.empty())
1836 size_t max_psize = patterns[0].
size();
1837 for (
size_t pi=1; pi<patterns.size(); ++pi)
1838 max_psize =
std::max(max_psize, patterns[pi].size());
1848 Function *func = prev->second.get();
1853 if (ends_contiguously) {
1854 if (max_psize*maximum_nrep < range.
size())
1866 if (range.
size() > maximum_range_size)
1870 if (ends_contiguously && buf.size()<range.
size())
1872 range.
resize(buf.size());
1877 while (!range.
empty()) {
1880 for (
size_t pi=0; pi<patterns.size(); ++pi) {
1881 size_t psize = patterns[pi].size();
1885 offset+psize<=buf.size() && nrep<maximum_nrep;
1887 if (memcmp(&buf[
offset], &patterns[pi][0], psize))
1890 if (nrep>0 && nrep>=minimum_nrep && (!ends_contiguously || nrep*psize==range.
size())) {
1893 assert(dblock!=NULL);
1899 fprintf(p->
debug,
"Partitioner::FindDataPadding for F%08"PRIx64
": added", func->
entry_va);
1900 fprintf(p->
debug,
" D%08"PRIx64, range.
first());
1909 if (p->
debug && nblocks>0)
1910 fprintf(p->
debug,
"\n");
1928 Function *func = prev->second.get();
1932 if (0!=(func->
reason & excluded_reasons))
1936 if (NULL==padding_ranges) {
1942 if (padding_ranges->find(args.
range.
first()-1)!=padding_ranges->end())
1947 assert(dblock!=NULL);
1951 fprintf(p->
debug,
"Partitioner::FindData: for F%08"PRIx64
": added D%08"PRIx64
"\n",
1969 if (begins_contiguously &&
1979 assert(last_block!=NULL);
1988 for (
size_t i=0; i<args.
ninsns && insn!=NULL; i++) {
1991 bool matches =
false;
1998 if (!matches && insn_x86) {
1999 if (x86_kinds.find(insn_x86->
get_kind())!=x86_kinds.end())
2003 for (
size_t j=0; !matches && j<byte_patterns.size(); j++) {
2013 padding.push_back(insn);
2017 if ((matches && insn) || padding.empty())
2019 if (begins_contiguously &&
2022 if (ends_contiguously) {
2027 if (padding.back()->get_address()+padding.back()->get_size() != args.
insn_end->
get_address()) {
2031 }
else if (i+1<args.
ninsns) {
2038 if (padding.back()->get_address()+padding.back()->get_size() - padding.front()->get_address() < minimum_size) {
2049 assert(!padding.empty());
2051 assert(prev_func!=NULL);
2052 rose_addr_t begin_va = padding.front()->get_address();
2053 rose_addr_t end_va = padding.back()->get_address() + padding.back()->get_size();
2054 assert(end_va>begin_va);
2055 size_t size = end_va - begin_va;
2057 assert(dblock!=NULL);
2059 for (
size_t i=0; i<padding.size(); i++)
2062 fprintf(p->
debug,
"Partitioner::FindInsnPadding: for F%08"PRIx64
": added D%08"PRIx64
"\n",
2069 fprintf(p->
debug,
"Partitioner::FindInsnPadding: for F%08"PRIx64
": added", new_func->
entry_va);
2070 for (
size_t i=0; i<padding.size(); i++) {
2079 fprintf(p->
debug,
"\n");
2082 retval = padding.size()!=args.
ninsns;
2097 DataBlocks::iterator dbi = data_blocks.find(start_va);
2098 if (dbi!=data_blocks.end()) {
2118 data_blocks[start_va] = db;
2122 assert(raw_bytes.size()==size);
2125 datum->set_raw_bytes(raw_bytes);
2126 db->
nodes.push_back(datum);
2139 if (!function_extents) {
2145 if (!code_criteria) {
2155 if (prev==function_extents->end())
2157 Function *func = prev->second.get();
2158 if (0!=(func->
reason & excluded_reasons))
2163 if (require_intrafunction) {
2167 if (next==function_extents->end() || next->second.get()!=func)
2174 if (require_noninterleaved && !p->
is_contiguous(func,
false))
2182 if (!code_criteria->satisfied_by(stats, &raw_vote))
2188 std::set<BasicBlock*> bblocks;
2189 while (!pending.
empty()) {
2194 if (bblocks.find(bb)==bblocks.end()) {
2196 for (InstructionVector::iterator ii=bb->
insns.begin(); ii!=bb->
insns.end(); ++ii) {
2197 Extent ie((*ii)->get_address(), (*ii)->get_size());
2206 for (std::set<BasicBlock*>::iterator bi=bblocks.begin(); bi!=bblocks.end(); ++bi) {
2207 (*bi)->code_likelihood = raw_vote;
2225 for (
size_t i=0; i<args.
ninsns; i++, va=next_va) {
2240 if (validate_targets) {
2244 if (!complete && 1!=succs.size())
2249 Functions::iterator fi = p->
functions.find(target_va);
2259 assert(1==bb->
insns.size());
2265 fprintf(p->
debug,
"Partitioner::FindThunks: found F%08"PRIx64
"\n", va);
2279 if (!padding_ranges) {
2287 if (padding_ranges->find(args.
range.
first()-1) == padding_ranges->end())
2294 if (next==padding_ranges->end())
2296 DataBlock *next_dblock = next->second.get();
2304 fprintf(p->
debug,
"Partitioner::FindInterPadFunctions: added F%08"PRIx64
"\n", new_func->
entry_va);
2316 while (!range.
empty()) {
2320 bool in_table =
false;
2322 for (va=range.
first(); va<=range.
last() && (in_table || thunks.empty()); va++) {
2323 if (begins_contiguously && !in_table && va>args.
range.
first())
2336 if (bb && bb->
insns.size()>1) {
2344 if (validate_targets) {
2348 if (bb->
insns.size()>1) {
2349 succs.insert(bb->
insns[1]->get_address());
2356 bool points_to_insn =
true;
2357 for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end() && points_to_insn; ++si)
2359 if (!points_to_insn) {
2372 if (thunks.size()>minimum_nthunks && (!ends_contiguously || va==args.
range.
last()+1)) {
2373 for (InstructionMap::iterator ii=thunks.begin(); ii!=thunks.end(); ++ii) {
2379 fprintf(p->
debug,
"Partitioner::FindThunkTable: thunk F%08"PRIx64
"\n", thunk->
entry_va);
2403 if (1!=bb->
insns.size())
2413 if (!complete || 1!=succs.size())
2417 Functions::iterator fi = functions.find(target_va);
2418 Function *target_func = fi==functions.end() ? NULL : fi->second;
2447 for (
size_t i=0; i<args.
ninsns; i++) {
2453 fprintf(p->
debug,
"Partitioner::PostFunctionBlocks: for F%08"PRIx64
": added", func->
entry_va);
2466 if (p->
debug && nadded)
2467 fprintf(p->
debug,
"\n");
2493 !x86InstructionIsUnconditionalBranch(insn) ||
2506 if (reg!=NULL && val!=NULL) {
2510 retval = value_of(val) +
offset;
2535 if (!gotplt || !gotplt->
is_mapped())
return;
2538 std::set<SgAsmElfRelocSection*> rsects;
2544 rsects.insert(reloc_section);
2546 if (rsects.empty())
return;
2549 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); fi++) {
2552 if (fi->second->name!=
"")
2569 assert(insn_x86!=NULL);
2577 for (std::set<SgAsmElfRelocSection*>::iterator ri=rsects.begin(); ri!=rsects.end() && fi->second->name==
""; ri++) {
2580 if (symbol_section) {
2582 for (
size_t ei=0; ei<entries->
get_entries().size() && fi->second->name==
""; ei++) {
2585 unsigned long symbol_idx = rel->
get_sym();
2586 assert(symbol_idx < symbols->get_symbols().size());
2614 typedef std::map<rose_addr_t, std::string> Index;
2622 void visit(
SgNode *node) {
2625 std::string
name =
import->get_name()->get_string();
2626 rose_addr_t va =
import->get_bound_rva().get_va();
2627 if (va!=0 && !name.empty())
2631 } imports(
this, fhdr);
2634 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
2636 if (!func->
name.empty())
2652 ImportIndexBuilder::iterator found = imports.index.find(base_va);
2653 if (found==imports.index.end())
2655 func->
name = found->second +
"@import";
2657 fprintf(debug,
"Partitioner::name_import_entries: F%08"PRIx64
": named \"%s\"\n", func->
entry_va, func->
name.c_str());
2666 for (
size_t i=0; i<iat_sections.size(); ++i) {
2667 if (-1==iat_sections[i]->get_id() && iat_sections[i]->is_mapped())
2668 pe_iat_extents.insert(
Extent(iat_sections[i]->get_mapped_actual_va(), iat_sections[i]->get_mapped_size()));
2677 fprintf(debug,
"Function reasons referenced by Partitioner debugging output:\n%s",
2681 mark_ipd_configuration();
2685 for (
size_t i=0; i<headers.size(); i++) {
2686 find_pe_iat_extents(headers[i]);
2688 mark_entry_targets(headers[i]);
2690 mark_eh_frames(headers[i]);
2692 mark_func_symbols(headers[i]);
2694 mark_elf_plt_entries(headers[i]);
2696 mark_export_entries(headers[i]);
2700 mark_func_patterns();
2710 for (
size_t i=0; i<user_detectors.size(); i++) {
2711 for (
size_t j=0; j<=headers.size(); j++) {
2713 user_detectors[i](
this, hdr);
2717 for (
size_t i=0; i<user_detectors.size(); i++) {
2718 user_detectors[i](
this, NULL);
2745 fprintf(debug,
"1st block %s F%08"PRIx64
" \"%s\": B%08"PRIx64
" ",
2756 if (debug) fprintf(debug,
"[split from B%08"PRIx64, bb->
address());
2758 if (debug) fprintf(debug,
" in F%08"PRIx64
" \"%s\"", bb->
address(), bb->
function->
name.c_str());
2761 if (debug) fprintf(debug,
"] ");
2763 bb = find_bb_containing(func->
entry_va);
2775 fprintf(debug,
"#%zu ", bb->
insns.size());
2777 fprintf(debug,
"no instruction at function entry address ");
2792 if (debug) fprintf(debug,
" B%08"PRIx64, va);
2800 Functions::iterator fi = functions.find(va);
2801 if (fi!=functions.end() && fi->second!=f) {
2802 if (debug) fprintf(debug,
"[entry \"%s\"]", fi->second->name.c_str());
2809 if (debug) fprintf(debug,
"#%zu", bb->
insns.size());
2815 if (debug) fprintf(debug,
" abandon");
2828 if (debug) fprintf(debug,
"[entry \"%s\"]", bb->
function->
name.c_str());
2865 if (debug) fprintf(debug,
" abandon");
2868 }
else if ((target_va=call_target(bb))!=NO_TARGET) {
2870 if (debug) fprintf(debug,
"[call F%08"PRIx64
"]", target_va);
2872 append(f, bb, reason);
2873 BasicBlock *target_bb = find_bb_containing(target_va);
2878 new_function = add_function(target_va, SgAsmFunction::FUNC_CALL_TARGET);
2879 }
else if (find_function(target_va)!=NULL) {
2888 (target_bb->
function!=f || new_function!=NULL))
2893 for (Disassembler::AddressSet::const_iterator si=suc.begin(); si!=suc.end(); ++si) {
2895 discover_blocks(f, *si, reason);
2899 append(f, bb, reason);
2901 for (Disassembler::AddressSet::const_iterator si=suc.begin(); si!=suc.end(); ++si) {
2903 discover_blocks(f, *si, reason);
2913 for (Disassembler::AddressSet::iterator hi=heads.begin(); hi!=heads.end(); ++hi)
2914 discover_blocks(f, *hi, reason);
2927 return pe_iat_extents.contains(
Extent(addr->get_absolute_value(), 4));
2933 return bb && bb->
insns.size()==1 && is_pe_dynlink_thunk(bb->
insns.front());
2945 for (
size_t pass=1;
true; pass++) {
2946 if (debug) fprintf(debug,
"\n========== Partitioner::analyze_cfg() pass %zu ==========\n", pass);
2947 progress(debug,
"Partitioner: starting %s pass %zu: "
2948 "%zu function%s, %zu insn%s, %zu block%s\n",
2950 functions.size(), 1==functions.size()?
"":
"s", insns.size(), 1==insns.size()?
"":
"s",
2951 basic_blocks.size(), 1==basic_blocks.size()?
"":
"s");
2954 for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi) {
2960 bool iscall = is_function_call(bb, &target_va);
2962 BasicBlock *return_bb = NULL, *target_bb = NULL;
2963 bool succs_complete;
2966 if (iscall && target_va!=NO_TARGET &&
2967 NULL!=(return_bb=find_bb_starting(return_va)) &&
2968 NULL!=(target_bb=find_bb_starting(target_va,
false)) &&
2969 target_bb->function && target_bb->function->possible_may_return()) {
2981 fprintf(debug,
" Function F%08"PRIx64
" may return by virtue of call fall-through at B%08"PRIx64
"\n",
2986 for (Disassembler::AddressSet::iterator si=succs.begin();
2990 target_bb = target_va!=0 ? find_bb_starting(target_va,
false) : NULL;
2991 if (target_bb && target_bb->function && target_bb->function!=bb->
function &&
2992 target_va==target_bb->function->
entry_va && target_bb->function->possible_may_return()) {
3006 fprintf(debug,
" F%08"PRIx64
" may return by virtue of branching to function F%08"PRIx64
3007 " which may return\n", bb->
function->
entry_va, target_bb->function->entry_va);
3016 fprintf(debug,
" F%08"PRIx64
" may return by virtue of incomplete successors\n",
3026 bool invalid_callee_va = !succs_complete;
3027 for (Disassembler::AddressSet::iterator si=succs.begin(); !invalid_callee_va && si!=succs.end(); ++si)
3028 invalid_callee_va = NULL==find_instruction(*si);
3029 if (invalid_callee_va) {
3037 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3041 fprintf(debug,
"%s F%08"PRIx64, might_now_return.empty()?
"newly returning functions:":
"", func->
entry_va);
3042 might_now_return.insert(func->
entry_va);
3046 if (debug && !might_now_return.empty())
3047 fprintf(debug,
"\n");
3055 if (!might_now_return.empty()) {
3056 for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi) {
3060 for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end(); ++si) {
3061 if (might_now_return.find(*si)!=might_now_return.end()) {
3066 BasicBlock *return_bb = find_bb_starting(return_va,
false);
3067 if (return_bb && return_bb->function!=bb->
function) {
3070 Function *called_func = find_function(*si);
3071 assert(called_func!=NULL);
3073 "newreturn %s F%08"PRIx64
" \"%s\" returns to B%08"PRIx64
" in F%08"PRIx64
"\n",
3085 std::vector<Function*> pending;
3086 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3087 assert(fi->second->entry_va==fi->first);
3088 if (fi->second->pending) {
3089 fi->second->clear_basic_blocks();
3090 fi->second->pending =
false;
3091 pending.push_back(fi->second);
3095 if (pending.size()==0) {
3102 for (
size_t i=0; i<pending.size(); ++i)
3103 discover_first_block(pending[i]);
3106 for (
size_t i=0; i<pending.size(); ++i) {
3108 fprintf(debug,
"analyzing %s F%08"PRIx64
" \"%s\" pass %zu: ",
3110 pending[i]->entry_va, pending[i]->
name.c_str(), pass);
3113 discover_blocks(pending[i], reason);
3119 pending[i]->show_properties(debug);
3131 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3132 while (detach_thunk(fi->second))
3145 if (NULL==entry_bb || entry_bb->
function!=func)
3157 if (entry_bb->
insns.size()>1) {
3158 second_va = entry_bb->
insns[1]->get_address();
3162 if (!complete || succs.size()!=1)
3164 second_va = *(succs.begin());
3168 if (
BasicBlock *target_bb = find_bb_containing(second_va)) {
3169 if (target_bb->function!=func)
3185 fprintf(debug,
"Partitioner::detach_thunk: detaching thunk F%08"PRIx64
" from body F%08"PRIx64
"\n",
3194 if (!func->
name.empty() && std::string::npos==func->
name.find(
"-thunk"))
3195 func->
name +=
"-thunk";
3199 func->
heads.clear();
3202 for (BasicBlocks::iterator bi=bblocks.begin(); bi!=bblocks.end(); ++bi) {
3204 BasicBlock *new_bb = find_bb_starting(second_va);
3205 assert(new_bb!=NULL);
3207 remove(func, new_bb);
3209 }
else if (new_bb->
function==new_func) {
3221 append(new_func, bb, bb->
reason);
3228 for (DataBlocks::iterator di=dblocks.begin(); di!=dblocks.end(); ++di) {
3230 remove(func, dblock);
3231 append(new_func, dblock, dblock->
reason);
3242 function_extent(&nonpadding_ranges);
3244 padding_extent(&padding_ranges);
3246 nonpadding_ranges.
erase(pi->first);
3252 if (npi==nonpadding_ranges.
end())
3254 Function *func = npi->second.get();
3255 if (func!=effective_function(padding))
3256 append(func, padding, padding->
reason,
true);
3266 typedef std::map<Function*, size_t> TravNumMap;
3267 TravNumMap traversal_number;
3268 std::vector<size_t> group_number;
3269 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3272 if (traversal_number.find(fi->second)!=traversal_number.end())
3275 size_t tnum = group_number.size();
3276 group_number.push_back(tnum);
3277 traversal_number[fi->second] = tnum;
3280 std::vector<Function*> dfs_functions;
3281 dfs_functions.push_back(fi->second);
3282 while (!dfs_functions.empty()) {
3283 Function *source_func = dfs_functions.back(); dfs_functions.pop_back();
3287 for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end(); ++si) {
3288 BasicBlock *target_bb = find_bb_starting(*si,
false);
3291 bool inserted = traversal_number.insert(std::make_pair(target_func, tnum)).second;
3293 dfs_functions.push_back(target_func);
3295 group_number[traversal_number[target_func]] = tnum;
3304 typedef std::vector<std::vector<Function*> > FragmentIndex;
3305 FragmentIndex fragment_index(group_number.size(), std::vector<Function*>());
3306 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3307 TravNumMap::iterator tn_found = traversal_number.find(fi->second);
3308 if (tn_found!=traversal_number.end()) {
3309 size_t gnum = group_number[tn_found->second];
3310 fragment_index[gnum].push_back(fi->second);
3316 std::vector<Function*> parent(fragment_index.size(), NULL);
3317 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3318 Function *source_func = fi->second;
3320 bool multi_parents =
false;
3321 for (BasicBlocks::iterator bi=source_func->
basic_blocks.begin();
3325 for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end() && !multi_parents; ++si) {
3326 BasicBlock *target_bb = find_bb_starting(*si,
false);
3328 TravNumMap::iterator tn_found = target_func ? traversal_number.find(target_func) : traversal_number.end();
3329 size_t gnum = tn_found!=traversal_number.end() ? group_number[tn_found->second] : (size_t)(-1);
3330 if (gnum!=(
size_t)(-1)) {
3333 parent[gnum] = NULL;
3334 fragment_index[gnum].clear();
3335 multi_parents =
true;
3337 parent[gnum] = source_func;
3347 fprintf(debug,
"Partitioner::merge_function_fragments...\n");
3348 for (
size_t gnum=0; gnum<fragment_index.size(); ++gnum) {
3349 if (parent[gnum]!=NULL && !fragment_index[gnum].empty()) {
3351 fprintf(debug,
"fragments %s F%08"PRIx64
" \"%s\" merging",
3353 parent[gnum]->entry_va, parent[gnum]->
name.c_str());
3355 for (std::vector<Function*>::iterator fi=fragment_index[gnum].begin(); fi!=fragment_index[gnum].end(); ++fi) {
3357 fprintf(debug,
" F0x%08"PRIx64, (*fi)->entry_va);
3358 merge_functions(parent[gnum], *fi); *fi = NULL;
3363 parent[gnum]->show_properties(debug);
3375 if (parent->
name.empty()) {
3377 }
else if (!other->
name.empty() && 0!=parent->
name.compare(other->
name)) {
3399 typedef std::map<rose_addr_t, std::string> NameMap;
3405 for (SgAsmGenericHeaderPtrList::const_iterator hi=hdrs.begin(); hi!=hdrs.end(); ++hi) {
3411 void visit(
SgNode *node) {
3413 std::string
name = import_item->get_name()->get_string();
3414 if (!name.empty() && !import_item->get_by_ordinal()) {
3417 rose_addr_t va = import_item->get_hintname_rva().get_va();
3425 NameMap::const_iterator found = names.find(va);
3426 return found==names.end() ? std::string() : found->second;
3435 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3437 if (is_pe_dynlink_thunk(func)) {
3439 if (func->
name.empty()) {
3443 if (complete && 1==succs.size())
3444 func->
name = names(*succs.begin());
3455 clear_aggregate_statistics();
3459 std::ostringstream s;
3460 s <<
"=== Mean ===\n" <<*mean <<
"\n"
3461 <<
"=== Variance ===\n" <<*variance <<
"\n";
3462 fputs(s.str().c_str(), debug);
3476 scan_unassigned_bytes(&fff, &exe_map);
3488 pattern.push_back(0x90);
3492 pattern.push_back(0xcc);
3498 scan_interfunc_bytes(&cb, &exe_map);
3508 scan_unassigned_bytes(&find_thunk_tables, &exe_map);
3509 for (
size_t npasses=0; npasses<5; ++npasses) {
3511 scan_unassigned_insns(&find_thunks);
3512 if (0==find_thunks.
nfound)
3520 scan_unassigned_bytes(&find_interpad_functions, &exe_map);
3524 if (func_heuristics & SgAsmFunction::FUNC_INTRABLOCK) {
3529 scan_unassigned_bytes(&fff, &exe_map);
3540 if (0!=(func_heuristics & SgAsmFunction::FUNC_THUNK) && detach_thunks()>0)
3544 if (func_heuristics & SgAsmFunction::FUNC_THUNK) {
3545 for (
size_t npasses=0; npasses<5; ++npasses) {
3547 scan_unassigned_insns(&find_thunks);
3548 if (0==find_thunks.
nfound)
3555 scan_unassigned_bytes(&find_data, &ro_map);
3562 merge_function_fragments();
3566 name_pe_dynlink_thunks(interp);
3568 for (
size_t i=0; i<headers.size(); i++) {
3569 name_plt_entries(headers[i]);
3570 name_import_entries(headers[i]);
3576 for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi) {
3584 "Partitioner completed: %zu function%s, %zu insn%s, %zu block%s\n",
3585 functions.size(), 1==functions.size()?
"":
"s", insns.size(), 1==insns.size()?
"":
"s",
3586 basic_blocks.size(), 1==basic_blocks.size()?
"":
"s");
3593 for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi)
3594 retval += function_extent(fi->second, extents);
3605 std::set<DataBlock*> my_dblocks;
3610 for (InstructionVector::iterator ii=bb->
insns.begin(); ii!=bb->
insns.end(); ++ii) {
3612 size_t size = (*ii)->get_size();
3615 hi_addr = start + size;
3617 lo_addr = std::min(lo_addr, start);
3618 hi_addr =
std::max(hi_addr, start + size);
3626 if (NULL==(*di)->function)
3627 my_dblocks.insert(*di);
3633 my_dblocks.insert(bi->second);
3636 for (std::set<DataBlock*>::iterator di=my_dblocks.begin(); di!=my_dblocks.end(); ++di) {
3639 DataRangeMap *data_extents_ptr = extents ? &data_extents : NULL;
3641 size_t n = datablock_extent(dblock, data_extents_ptr, &lo, &hi);
3647 lo_addr = std::min(lo_addr, lo);
3653 extents->
insert(di2->first, func);
3660 *lo_addr_ptr = lo_addr;
3662 *hi_addr_ptr = hi_addr;
3670 for (DataBlocks::const_iterator di=data_blocks.begin(); di!=data_blocks.end(); ++di) {
3673 datablock_extent(dblock, extents);
3684 for (DataBlocks::const_iterator di=data_blocks.begin(); di!=data_blocks.end(); ++di) {
3686 if (NULL!=effective_function(dblock)) {
3687 datablock_extent(dblock, extents);
3699 if (db->
nodes.empty()) {
3706 size_t size = db->
nodes.front()->get_size();
3708 *lo_addr_ptr = start;
3710 *hi_addr_ptr = start+size;
3715 for (
size_t i=1; i<db->
nodes.size(); i++) {
3721 *lo_addr_ptr = std::min(*lo_addr_ptr, start);
3723 *hi_addr_ptr =
std::max(*hi_addr_ptr, start+size);
3727 return db->
nodes.size();
3736 if (0==function_extent(func, &extents, &lo_addr, &hi_addr) || 1==extents.
size())
3743 InstructionMap::iterator ii = insns.
lower_bound(
std::max(lo_addr,max_insn_size)-max_insn_size);
3744 for (; ii!=insns.end() && ii->first<hi_addr; ++ii) {
3745 if (ii->first>=lo_addr) {
3746 BasicBlock *bb = find_bb_containing(ii->first,
false);
3754 for (DataBlocks::iterator dbi=data_blocks.begin(); dbi!=data_blocks.end(); ++dbi) {
3756 Function *block_func = effective_function(block);
3757 if (block_func!=NULL && block_func!=func) {
3758 for (
size_t i=0; i<block->
nodes.size(); i++) {
3759 if (block->
nodes[i]->get_address() < hi_addr &&
3760 block->
nodes[i]->get_address() + block->
nodes[i]->get_size() > lo_addr)
3773 typedef std::map<rose_addr_t, SgAsmBlock*> BlockMap;
3777 BlockMap *block_map;
3778 BlockMapBuilder(
SgNode *ast, BlockMap *block_map): block_map(block_map) {
3781 void visit(
SgNode *node) {
3787 block_map->insert(std::make_pair(insn->
get_address(), block));
3794 const BlockMap &block_map;
3795 TargetPopulator(
SgNode *ast,
const BlockMap &block_map): block_map(block_map) {
3798 void visit(
SgNode *node) {
3805 if (bi!=block_map.end())
3814 BlockMapBuilder(ast, &block_map);
3815 TargetPopulator(ast, block_map);
3831 : p(p), interp(interp), insn(NULL) {}
3833 void atTraversalStart() {
3837 for (SgAsmGenericHeaderPtrList::const_iterator hi=headers.begin(); hi!=headers.end(); ++hi) {
3838 if ((*hi)->is_mapped())
3839 mapped_sections.push_back(*hi);
3841 mapped_sections.insert(mapped_sections.end(), file_sections.begin(), file_sections.end());
3849 void preOrderVisit(
SgNode *node) {
3869 SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(target_insn->
node);
3880 if (dbi!=static_data.end()) {
3882 for (
size_t i=0; i<dblock->
nodes.size(); ++i) {
3901 void postOrderVisit(
SgNode *node) {
3907 FixerUpper(
this, interp).traverse(ast);
3926 function_extent(&existing);
3929 bool process_instructions;
3931 process_instructions =
false;
3933 for (InstructionMap::iterator ii=insns_copy.begin(); ii!=insns_copy.end(); ++ii) {
3935 size_t size = ii->second->get_size();
3937 BasicBlock *bb = find_bb_containing(ii->first);
3941 catchall = add_function(ii->first, SgAsmFunction::FUNC_LEFTOVERS,
"***uncategorized blocks***");
3943 process_instructions =
true;
3947 }
while (process_instructions);
3952 for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3954 if (!func_decl)
continue;
3963 functions.erase(catchall->
entry_va);
3968 fixup_cfg_edges(retval);
3969 fixup_pointers(retval, interp);
3979 fprintf(debug,
"function F%08"PRIx64
" \"%s\" has no basic blocks!\n", f->
entry_va, f->
name.c_str());
3985 typedef std::multimap<rose_addr_t, SgAsmStatement*> NodeMap;
3986 std::set<DataBlock*> my_data_blocks;
3991 if (!first_basic_block)
3992 first_basic_block = bblock;
3996 nodes.insert(std::make_pair(bblock->
address(), node));
3999 for (std::set<DataBlock*>::iterator di=bblock->
data_blocks.begin(); di!=bblock->
data_blocks.end(); ++di) {
4001 Function *dblock_func = effective_function(dblock);
4003 my_data_blocks.insert(dblock);
4010 my_data_blocks.insert(dblock);
4013 for (std::set<DataBlock*>::iterator di=my_data_blocks.begin(); di!=my_data_blocks.end(); ++di) {
4016 nodes.insert(std::make_pair(dblock->
address(), ast_block));
4033 for (NodeMap::iterator ni=nodes.begin(); ni!=nodes.end(); ++ni) {
4035 ni->second->set_parent(retval);
4038 unsigned reasons = f->
reason;
4041 function_extent(f, &extent);
4043 reasons |= SgAsmFunction::FUNC_DISCONT;
4059 for (InstructionVector::const_iterator ii=block->
insns.begin(); ii!=block->
insns.end(); ++ii) {
4070 for (Disassembler::AddressSet::iterator si=successor_addrs.begin(); si!=successor_addrs.end(); ++si) {
4088 for (std::vector<SgAsmStaticData*>::const_iterator ni=block->
nodes.begin(); ni!=block->
nodes.end(); ++ni) {
4090 assert(NULL==(*ni)->get_parent());
4091 (*ni)->set_parent(retval);
4100 disassembler = NULL;
4101 add_instructions(insns);
4105 if (!map && !old_map)
4115 retval = build_ast(interp);
4116 set_map(old_map, &old_ro_map);
4118 set_map(old_map, &old_ro_map);
4136 return build_ast(interp);
4142 for (Disassembler::InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
4144 this->insns.insert(std::make_pair(ii->first, insn));
4152 for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
4154 retval.insert(std::make_pair(ii->first, insn));
4163 assert(interp!=NULL);
4173 loader->
remap(interp);
4180 throw std::runtime_error(
"no valid disassembler for this interpretation");
4181 disassembler = disassembler->
clone();
4182 SgFile *file = SageInterface::getEnclosingNode<SgFile>(interp);
4194 for (SgAsmGenericHeaderPtrList::const_iterator hi=headers.begin(); hi!=headers.end(); ++hi) {
4195 SgRVAList entry_rvalist = (*hi)->get_entry_rvas();
4196 for (
size_t i=0; i<entry_rvalist.size(); ++i) {
4197 rose_addr_t entry_va = (*hi)->get_base_va() + entry_rvalist[i].get_rva();
4198 worklist.insert(entry_va);
4214 block->set_parent(interp);
4218 delete disassembler;
4228 if (insns.size()>0) {
4230 bb_starts[insn_va] = BasicBlockStarts::mapped_type();
4233 for (Disassembler::InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
4244 Disassembler::InstructionMap::const_iterator found = insns.find(next_va);
4245 if (found!=insns.end()) {
4254 }
else if (bb_starts.find(next_va)==bb_starts.end()) {
4255 bb_starts[next_va] = BasicBlockStarts::mapped_type();
4265 for (Disassembler::AddressSet::const_iterator si=successors.begin(); si!=successors.end(); ++si) {
4267 if ((successor_va != next_va || successors.size()>1) && insns.find(successor_va)!=insns.end())
4268 bb_starts[successor_va].insert(insn_va);
4280 for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi)
4281 retval.insert(std::make_pair(fi->first,
FunctionStart(fi->second->reason, fi->second->name)));