ROSE  0.9.6a
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ElfFileHeader.C
Go to the documentation of this file.
1 /* ELF File Header (SgAsmElfFileHeader and related classes) */
2 
3 #include "sage3basic.h"
4 
10 void
12 {
13  ROSE_ASSERT(get_file()!=NULL);
14  ROSE_ASSERT(get_size()>0);
15 
16  set_name(new SgAsmBasicString("ELF File Header"));
17  set_synthesized(true);
19 
20  /* Magic number */
21  p_magic.clear();
22  p_magic.push_back(0x7f);
23  p_magic.push_back('E');
24  p_magic.push_back('L');
25  p_magic.push_back('F');
26 
27  /* Executable Format */
28  ROSE_ASSERT(p_exec_format!=NULL);
37 
39  p_e_ident_data_encoding = 1; /*LSB*/
41 }
42 
44 bool
46 {
47  /* Turn off byte reference tracking for the duration of this function. We don't want our testing the file contents to
48  * affect the list of bytes that we've already referenced or which we might reference later. */
49  bool was_tracking = file->get_tracking_references();
50  file->set_tracking_references(false);
51 
52  try {
53  unsigned char magic[4];
54  file->read_content(0, magic, sizeof magic);
55  if (0x7f!=magic[0] || 'E'!=magic[1] || 'L'!=magic[2] || 'F'!=magic[3])
56  throw 1;
57  } catch (...) {
58  file->set_tracking_references(was_tracking);
59  return false;
60  }
61  file->set_tracking_references(was_tracking);
62  return true;
63 }
64 
67 SgAsmElfFileHeader::machine_to_isa(unsigned machine) const
68 {
69  switch (p_e_machine) { /* These come from the Portable Formats Specification v1.1 */
70  case 0: return ISA_UNSPECIFIED;
71  case 1: return ISA_ATT_WE_32100;
72  case 2: return ISA_SPARC_Family;
73  case 3: return ISA_IA32_386;
74  case 4: return ISA_M68K_Family;
75  case 5: return ISA_M88K_Family;
76  case 7: return ISA_I860_Family;
77  case 8: return ISA_MIPS_Family;
78  case 20:
79  // Note that PowerPC has: p_e_machine = 20 = 0x14, using both gcc on BGL and xlc on BGL.
80  // However, these don't seem like correct values for PowerPC.
81  return ISA_PowerPC;
82  case 40: return ISA_ARM_Family;
83  case 62: return ISA_X8664_Family;
84  default:
85  /*FIXME: There's a whole lot more. See Dan's Elf reader. */
86  // DQ (10/12/2008): Need more information to address PowerPC support.
87  fprintf(stderr, "Warning: SgAsmElfFileHeader::parse::p_e_machine = 0x%lx (%lu)\n", p_e_machine, p_e_machine);
88  return ISA_OTHER;
89  }
90 }
91 
93 unsigned
95 {
96  switch (isa) {
97  case ISA_UNSPECIFIED:
98  case ISA_OTHER: return p_e_machine;
99  case ISA_ATT_WE_32100: return 1;
100  case ISA_IA32_386: return 3;
101  case ISA_PowerPC: return 20; /*see note in machine_to_isa()*/
102  default:
103  switch (isa & ISA_FAMILY_MASK) {
104  case ISA_SPARC_Family: return 2;
105  case ISA_M68K_Family: return 4;
106  case ISA_M88K_Family: return 5;
107  case ISA_I860_Family: return 7;
108  case ISA_MIPS_Family: return 8;
109  case ISA_ARM_Family: return 40;
110  case ISA_X8664_Family: return 62;
111  default:
112  return p_e_machine;
113  }
114  }
115 }
116 
123 {
125 
126  /* Read 32-bit header for now. Might need to re-read as 64-bit later. */
127  Elf32FileHeader_disk disk32;
128  if (sizeof(disk32)>get_size())
129  extend(sizeof(disk32)-get_size());
130  read_content_local(0, &disk32, sizeof disk32, false); /*zero pad if we read EOF*/
131 
132  /* Check magic number early */
133  if (disk32.e_ident_magic[0]!=0x7f || disk32.e_ident_magic[1]!='E' ||
134  disk32.e_ident_magic[2]!='L' || disk32.e_ident_magic[3]!='F')
135  throw FormatError("Bad ELF magic number");
136 
137  /* File byte order should be 1 or 2. However, we've seen at least one example that left the byte order at zero, implying
138  * that it was the native order. We don't have the luxury of decoding the file on the native machine, so in that case we
139  * try to infer the byte order by looking at one of the other multi-byte fields of the file. */
141  if (1 == disk32.e_ident_data_encoding) {
142  sex = ByteOrder::ORDER_LSB;
143  } else if (2==disk32.e_ident_data_encoding) {
144  sex = ByteOrder::ORDER_MSB;
145  } else if ((disk32.e_type & 0xff00)==0xff00) {
146  /* One of the 0xffxx processor-specific flags in native order */
147  if ((disk32.e_type & 0x00ff)==0xff)
148  throw FormatError("invalid ELF header byte order"); /*ambiguous*/
149  sex = ByteOrder::host_order();
150  } else if ((disk32.e_type & 0x00ff)==0x00ff) {
151  /* One of the 0xffxx processor specific orders in reverse native order */
153  } else if ((disk32.e_type & 0xff00)==0) {
154  /* One of the low-valued file types in native order */
155  if ((disk32.e_type & 0x00ff)==0)
156  throw FormatError("invalid ELF header byte order"); /*ambiguous*/
157  sex = ByteOrder::host_order();
158  } else if ((disk32.e_type & 0x00ff)==0) {
159  /* One of the low-valued file types in reverse native order */
161  } else {
162  /* Ambiguous order */
163  throw FormatError("invalid ELF header byte order");
164  }
165  ROSE_ASSERT(p_exec_format != NULL);
166  p_exec_format->set_sex(sex);
167  p_e_ident_data_encoding = disk32.e_ident_data_encoding; /*save original value*/
168 
169  /* Decode header to native format */
170  rose_rva_t entry_rva, sectab_rva, segtab_rva;
171  if (1 == disk32.e_ident_file_class) {
173 
174  p_e_ident_padding.clear();
175  for (size_t i=0; i<sizeof(disk32.e_ident_padding); i++)
176  p_e_ident_padding.push_back(disk32.e_ident_padding[i]);
177 
180  p_e_type = ByteOrder::disk_to_host(sex, disk32.e_type);
183  entry_rva = ByteOrder::disk_to_host(sex, disk32.e_entry);
184  segtab_rva = ByteOrder::disk_to_host(sex, disk32.e_phoff);
185  sectab_rva = ByteOrder::disk_to_host(sex, disk32.e_shoff);
188 
190  if (p_e_phnum>0) {
194  } else {
195  p_phextrasz = 0;
196  }
197 
199  if (p_e_shnum>0) {
203  } else {
204  p_shextrasz = 0;
205  }
206 
208  } else if (2 == disk32.e_ident_file_class) {
209  /* We guessed wrong. This is a 64-bit header, not 32-bit. */
211  Elf64FileHeader_disk disk64;
212  if (sizeof(disk64)>get_size())
213  extend(sizeof(disk64)-get_size());
214  read_content_local(0, &disk64, sizeof disk64, false); /*zero pad at EOF*/
215 
216  p_e_ident_padding.clear();
217  for (size_t i=0; i<sizeof(disk64.e_ident_padding); i++)
218  p_e_ident_padding.push_back(disk64.e_ident_padding[i]);
219 
222  p_e_type = ByteOrder::disk_to_host(sex, disk64.e_type);
225  entry_rva = ByteOrder::disk_to_host(sex, disk64.e_entry);
226  segtab_rva = ByteOrder::disk_to_host(sex, disk64.e_phoff);
227  sectab_rva = ByteOrder::disk_to_host(sex, disk64.e_shoff);
230 
232  if (p_e_phnum>0) {
236  } else {
237  p_phextrasz = 0;
238  }
239 
241  if (p_e_shnum>0) {
245  } else {
246  p_shextrasz = 0;
247  }
248 
250  } else {
251  throw FormatError("invalid ELF header file class");
252  }
253 
254  /* Magic number. disk32 and disk64 have header bytes at same offset */
255  p_magic.clear();
256  for (size_t i=0; i<sizeof(disk32.e_ident_magic); i++)
257  p_magic.push_back(disk32.e_ident_magic[i]);
258 
259  /* File format */
261  switch (p_e_type) {
262  case 0:
264  break;
265  case 1:
266  case 3:
268  break;
269  case 2:
271  break;
272  case 4:
274  break;
275  default:
276  if (p_e_type >= 0xff00 && p_e_type <= 0xffff) {
278  } else {
280  }
281  break;
282  }
284  p_exec_format->set_abi(ABI_UNSPECIFIED); /* ELF specifies a target architecture rather than an ABI */
286 
287  /* Target architecture */
289 
290  /* Read the optional section and segment tables and the sections to which they point. An empty section or segment table is
291  * treated as if it doesn't exist. This seems to be compatible with the loader since the 45-bit "tiny" ELF executable
292  * stores a zero in the e_shnum member and a completely invalid value in the e_shoff member. */
293  if (sectab_rva>0 && get_e_shnum()>0) {
295  tab->set_offset(sectab_rva.get_rva());
296  tab->parse();
297  }
298  if (segtab_rva>0 && get_e_phnum()>0) {
300  tab->set_offset(segtab_rva.get_rva());
301  tab->parse();
302  }
303 
304  /* Associate the entry point with a particular section. */
305  entry_rva.bind(this);
306  add_entry_rva(entry_rva);
307 
308  return this;
309 }
310 
314 uint64_t
316 {
317  /* FIXME:
318  * System V max page size is 4k.
319  * IA32 is 4k
320  * x86_64 is 2MB
321  * Other systems may vary! */
322  return 4*1024;
323 }
324 
328 {
331  for (size_t i=0; i<sections.size(); i++) {
332  SgAsmElfSection *elfsec = dynamic_cast<SgAsmElfSection*>(sections[i]);
333  if (elfsec && elfsec->get_section_entry()!=NULL)
334  retval.push_back(elfsec);
335  }
336  return retval;
337 }
338 
342 {
345  for (size_t i=0; i<sections.size(); i++) {
346  SgAsmElfSection *elfsec = dynamic_cast<SgAsmElfSection*>(sections[i]);
347  if (elfsec && elfsec->get_segment_entry()!=NULL)
348  retval.push_back(elfsec);
349  }
350  return retval;
351 }
352 
354 void *
356 {
357  ROSE_ASSERT(p_magic.size() == NELMTS(disk->e_ident_magic));
358  for (size_t i=0; i<NELMTS(disk->e_ident_magic); i++)
359  disk->e_ident_magic[i] = p_magic[i];
363  ROSE_ASSERT(p_e_ident_padding.size() == NELMTS(disk->e_ident_padding));
364  for (size_t i=0; i<NELMTS(disk->e_ident_padding); i++)
365  disk->e_ident_padding[i] = p_e_ident_padding[i];
366  ByteOrder::host_to_disk(sex, p_e_type, &(disk->e_type));
370  if (get_segment_table()) {
372  } else {
373  ByteOrder::host_to_disk(sex, 0, &(disk->e_phoff));
374  }
375  if (get_section_table()) {
377  } else {
378  ByteOrder::host_to_disk(sex, 0, &(disk->e_shoff));
379  }
380  ByteOrder::host_to_disk(sex, p_e_flags, &(disk->e_flags));
382 
383  if (p_e_phnum>0) {
385  &(disk->e_phentsize));
386  } else {
387  ByteOrder::host_to_disk(sex, 0, &(disk->e_phentsize));
388  }
389  if (p_e_shnum>0) {
391  &(disk->e_shentsize));
392  } else {
393  ByteOrder::host_to_disk(sex, 0, &(disk->e_shentsize));
394  }
395  ByteOrder::host_to_disk(sex, p_e_phnum, &(disk->e_phnum));
396  ByteOrder::host_to_disk(sex, p_e_shnum, &(disk->e_shnum));
398 
399  return disk;
400 }
401 void *
403 {
404  ROSE_ASSERT(p_magic.size() == NELMTS(disk->e_ident_magic));
405  for (size_t i=0; i < NELMTS(disk->e_ident_magic); i++)
406  disk->e_ident_magic[i] = p_magic[i];
410  ROSE_ASSERT(p_e_ident_padding.size() == NELMTS(disk->e_ident_padding));
411  for (size_t i=0; i<NELMTS(disk->e_ident_padding); i++)
412  disk->e_ident_padding[i] = p_e_ident_padding[i];
413  ByteOrder::host_to_disk(sex, p_e_type, &(disk->e_type));
417  if (get_segment_table()) {
419  } else {
420  ByteOrder::host_to_disk(sex, 0, &(disk->e_phoff));
421  }
422  if (get_section_table()) {
424  } else {
425  ByteOrder::host_to_disk(sex, 0, &(disk->e_shoff));
426  }
427  ByteOrder::host_to_disk(sex, p_e_flags, &(disk->e_flags));
430  &(disk->e_phentsize));
431  ByteOrder::host_to_disk(sex, p_e_phnum, &(disk->e_phnum));
433  &(disk->e_shentsize));
434  ByteOrder::host_to_disk(sex, p_e_shnum, &(disk->e_shnum));
436 
437  return disk;
438 }
439 
441 bool
443 {
444  /* Reallocate superclass. This also calls reallocate() for all the sections associated with this ELF File Header. */
445  bool reallocated = SgAsmGenericHeader::reallocate();
446 
447  /* Resize header based on current word size */
448  rose_addr_t need;
449  if (4==get_word_size()) {
450  need = sizeof(Elf32FileHeader_disk);
451  } else if (8==get_word_size()) {
452  need = sizeof(Elf64FileHeader_disk);
453  } else {
454  throw FormatError("unsupported ELF word size");
455  }
456  if (need < get_size()) {
457  if (is_mapped()) {
458  ROSE_ASSERT(get_mapped_size()==get_size());
459  set_mapped_size(need);
460  }
461  set_size(need);
462  reallocated = true;
463  } else if (need > get_size()) {
465  reallocated = true;
466  }
467 
468  /* Update ELF-specific file class data member from generic data. */
469  switch(get_word_size()) {
470  case 4:
472  break;
473  case 8:
475  break;
476  default:
477  ROSE_ASSERT(!"invalid word size");
478  break;
479  }
480 
481  /* Byte order. According to the spec, valid values are 1 (little-endian) and 2 (big-endian). However, we've seen cases
482  * where a value of zero is used to indicate "native" order (loader assumes words are in the order of the machine on which
483  * the loader is running, and the ROSE ELF parser determines the order by looking at other fields in the header). Any
484  * original value other than 1 or 2 will be written to the new output; otherwise we choose 1 or 2 based on the currently
485  * defined byte order. */
488  }
489 
490  /* Update ELF-specific file type from generic data. */
491  switch (p_exec_format->get_purpose()) {
492  case PURPOSE_UNSPECIFIED:
494  case PURPOSE_OS_SPECIFIC:
495  case PURPOSE_OTHER:
496  /* keep as is */
497  break;
498  case PURPOSE_LIBRARY:
499  if (p_e_type==1 || p_e_type==3) {
500  /* keep as is */
501  } else {
502  p_e_type = 1;
503  }
504  break;
505  case PURPOSE_EXECUTABLE:
506  p_e_type = 2;
507  break;
508  case PURPOSE_CORE_DUMP:
509  p_e_type = 4;
510  }
511 
512  /* Update ELF machine type. */
514 
515  /* The ELF header stores its own size */
516  p_e_ehsize = get_size();
517 
518  return reallocated;
519 }
520 
522 void
523 SgAsmElfFileHeader::unparse(std::ostream &f) const
524 {
525  /* Write unreferenced areas back to the file before anything else. */
526  unparse_holes(f);
527 
528  /* Write the ELF segment table and segments first since they generally overlap with more specific things which may have
529  * been modified when walking the AST. (We generally don't modify segments, just the more specific sections.) */
530  if (p_segment_table) {
531  ROSE_ASSERT(p_segment_table->get_header()==this);
533  }
534 
535  /* Write the ELF section table and, indirectly, the sections themselves. */
536  if (p_section_table) {
537  ROSE_ASSERT(p_section_table->get_header()==this);
539  }
540 
541  /* Encode and write the ELF file header */
542  Elf32FileHeader_disk disk32;
543  Elf64FileHeader_disk disk64;
544  void *disk = NULL;
545  size_t struct_size = 0;
546  if (4 == get_word_size()) {
547  disk = encode(get_sex(), &disk32);
548  struct_size = sizeof(disk32);
549  } else if (8 == get_word_size()) {
550  disk = encode(get_sex(), &disk64);
551  struct_size = sizeof(disk64);
552  } else {
553  ROSE_ASSERT(!"unsupported word size");
554  }
555  write(f, 0, struct_size, disk);
556 }
557 
559 void
560 SgAsmElfFileHeader::dump(FILE *f, const char *prefix, ssize_t idx) const
561 {
562  char p[4096];
563  if (idx>=0) {
564  sprintf(p, "%sElfFileHeader[%zd].", prefix, idx);
565  } else {
566  sprintf(p, "%sElfFileHeader.", prefix);
567  }
568  int w = std::max(1, DUMP_FIELD_WIDTH-(int)strlen(p));
569 
570  SgAsmGenericHeader::dump(f, p, -1);
571  const char *class_s = 1==p_e_ident_file_class ? " (32-bit)" :
572  2==p_e_ident_file_class ? " (64-bit)" : "";
573  fprintf(f, "%s%-*s = %u%s\n", p, w, "e_ident_file_class", p_e_ident_file_class, class_s);
574  fprintf(f, "%s%-*s = %u\n", p, w, "e_ident_file_version", p_e_ident_file_version);
575  for (size_t i=0; i < p_e_ident_padding.size(); i++)
576  fprintf(f, "%s%-*s = [%zu] %u\n", p, w, "e_ident_padding", i, p_e_ident_padding[i]);
577  fprintf(f, "%s%-*s = %lu\n", p, w, "e_type", p_e_type);
578  fprintf(f, "%s%-*s = %lu\n", p, w, "e_machine", p_e_machine);
579  fprintf(f, "%s%-*s = 0x%08lx\n", p, w, "e_flags", p_e_flags);
580  fprintf(f, "%s%-*s = 0x%08lx (%lu) bytes\n", p, w, "e_ehsize", p_e_ehsize, p_e_ehsize);
581  fprintf(f, "%s%-*s = 0x%08lx (%lu) bytes\n", p, w, "phextrasz", p_phextrasz, p_phextrasz);
582  fprintf(f, "%s%-*s = %lu\n", p, w, "e_phnum", p_e_phnum);
583  fprintf(f, "%s%-*s = 0x%08lx (%lu) bytes\n", p, w, "shextrasz", p_shextrasz, p_shextrasz);
584  fprintf(f, "%s%-*s = %lu\n", p, w, "e_shnum", p_e_shnum);
585  fprintf(f, "%s%-*s = %lu\n", p, w, "e_shstrndx", p_e_shstrndx);
586  if (p_section_table) {
587  fprintf(f, "%s%-*s = [%d] \"%s\"\n", p, w, "section_table",
589  } else {
590  fprintf(f, "%s%-*s = none\n", p, w, "section_table");
591  }
592  if (p_segment_table) {
593  fprintf(f, "%s%-*s = [%d] \"%s\"\n", p, w, "segment_table",
595  } else {
596  fprintf(f, "%s%-*s = none\n", p, w, "segment_table");
597  }
598 
599  if (variantT() == V_SgAsmElfFileHeader) //unless a base class
600  hexdump(f, 0, std::string(p)+"data at ", p_data);
601 }