| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "courgette/disassembler_win32_x64.h" | 5 #include "courgette/disassembler_win32_x64.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 | 9 |
| 10 #include <algorithm> | 10 #include <algorithm> |
| 11 #include <string> | |
| 12 #include <vector> | |
| 13 | 11 |
| 14 #include "base/logging.h" | 12 #include "base/logging.h" |
| 15 #include "base/numerics/safe_conversions.h" | 13 #include "base/numerics/safe_conversions.h" |
| 16 | |
| 17 #include "courgette/assembly_program.h" | 14 #include "courgette/assembly_program.h" |
| 18 #include "courgette/courgette.h" | 15 #include "courgette/courgette.h" |
| 19 #include "courgette/encoded_program.h" | 16 |
| 17 #if COURGETTE_HISTOGRAM_TARGETS |
| 18 #include <iostream> |
| 19 #endif |
| 20 | 20 |
| 21 namespace courgette { | 21 namespace courgette { |
| 22 | 22 |
| 23 DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) | 23 DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) |
| 24 : Disassembler(start, length), | 24 : Disassembler(start, length), |
| 25 incomplete_disassembly_(false), | 25 incomplete_disassembly_(false), |
| 26 is_PE32_plus_(false), | 26 is_PE32_plus_(false), |
| 27 optional_header_(NULL), | 27 optional_header_(nullptr), |
| 28 size_of_optional_header_(0), | 28 size_of_optional_header_(0), |
| 29 offset_of_data_directories_(0), | 29 offset_of_data_directories_(0), |
| 30 machine_type_(0), | 30 machine_type_(0), |
| 31 number_of_sections_(0), | 31 number_of_sections_(0), |
| 32 sections_(NULL), | 32 sections_(nullptr), |
| 33 has_text_section_(false), | 33 has_text_section_(false), |
| 34 size_of_code_(0), | 34 size_of_code_(0), |
| 35 size_of_initialized_data_(0), | 35 size_of_initialized_data_(0), |
| 36 size_of_uninitialized_data_(0), | 36 size_of_uninitialized_data_(0), |
| 37 base_of_code_(0), | 37 base_of_code_(0), |
| 38 base_of_data_(0), | 38 base_of_data_(0), |
| 39 image_base_(0), | 39 image_base_(0), |
| 40 size_of_image_(0), | 40 size_of_image_(0), |
| 41 number_of_data_directories_(0) { | 41 number_of_data_directories_(0) { |
| 42 } |
| 43 |
| 44 FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { |
| 45 const Section* section = RVAToSection(rva); |
| 46 if (section != nullptr) { |
| 47 FileOffset offset_in_section = rva - section->virtual_address; |
| 48 // Need this extra check, since an |rva| may be valid for a section, but is |
| 49 // non-existent in an image (e.g. uninit data). |
| 50 if (offset_in_section >= section->size_of_raw_data) |
| 51 return kNoFileOffset; |
| 52 |
| 53 return static_cast<FileOffset>(section->file_offset_of_raw_data + |
| 54 offset_in_section); |
| 55 } |
| 56 |
| 57 // Small RVA values point into the file header in the loaded image. |
| 58 // RVA 0 is the module load address which Windows uses as the module handle. |
| 59 // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the |
| 60 // DOS header. |
| 61 if (rva == 0 || rva == 2) |
| 62 return static_cast<FileOffset>(rva); |
| 63 |
| 64 NOTREACHED(); |
| 65 return kNoFileOffset; |
| 66 } |
| 67 |
| 68 RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const { |
| 69 for (int i = 0; i < number_of_sections_; ++i) { |
| 70 const Section* section = §ions_[i]; |
| 71 if (file_offset >= section->file_offset_of_raw_data) { |
| 72 FileOffset offset_in_section = |
| 73 file_offset - section->file_offset_of_raw_data; |
| 74 if (offset_in_section < section->size_of_raw_data) |
| 75 return static_cast<RVA>(section->virtual_address + offset_in_section); |
| 76 } |
| 77 } |
| 78 |
| 79 NOTREACHED(); |
| 80 return kNoRVA; |
| 42 } | 81 } |
| 43 | 82 |
| 44 // ParseHeader attempts to match up the buffer with the Windows data | 83 // ParseHeader attempts to match up the buffer with the Windows data |
| 45 // structures that exist within a Windows 'Portable Executable' format file. | 84 // structures that exist within a Windows 'Portable Executable' format file. |
| 46 // Returns 'true' if the buffer matches, and 'false' if the data looks | 85 // Returns 'true' if the buffer matches, and 'false' if the data looks |
| 47 // suspicious. Rather than try to 'map' the buffer to the numerous windows | 86 // suspicious. Rather than try to 'map' the buffer to the numerous windows |
| 48 // structures, we extract the information we need into the courgette::PEInfo | 87 // structures, we extract the information we need into the courgette::PEInfo |
| 49 // structure. | 88 // structure. |
| 50 // | 89 // |
| 51 bool DisassemblerWin32X64::ParseHeader() { | 90 bool DisassemblerWin32X64::ParseHeader() { |
| 52 if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/) | 91 if (length() < kOffsetOfFileAddressOfNewExeHeader + 4 /*size*/) |
| 53 return Bad("Too small"); | 92 return Bad("Too small"); |
| 54 | 93 |
| 55 // Have 'MZ' magic for a DOS header? | 94 // Have 'MZ' magic for a DOS header? |
| 56 if (start()[0] != 'M' || start()[1] != 'Z') | 95 if (start()[0] != 'M' || start()[1] != 'Z') |
| 57 return Bad("Not MZ"); | 96 return Bad("Not MZ"); |
| 58 | 97 |
| 59 // offset from DOS header to PE header is stored in DOS header. | 98 // offset from DOS header to PE header is stored in DOS header. |
| 60 uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader); | 99 FileOffset file_offset = static_cast<FileOffset>( |
| 100 ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader)); |
| 61 | 101 |
| 62 if (offset >= length()) | 102 if (file_offset >= length()) |
| 63 return Bad("Bad offset to PE header"); | 103 return Bad("Bad offset to PE header"); |
| 64 | 104 |
| 65 const uint8_t* const pe_header = OffsetToPointer(offset); | 105 const uint8_t* const pe_header = FileOffsetToPointer(file_offset); |
| 66 const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; | 106 const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; |
| 67 if (pe_header <= start() || | 107 if (pe_header <= start() || |
| 68 pe_header >= end() - kMinPEHeaderSize) | 108 pe_header >= end() - kMinPEHeaderSize) |
| 69 return Bad("Bad offset to PE header"); | 109 return Bad("Bad file offset to PE header"); |
| 70 | 110 |
| 71 if (offset % 8 != 0) | 111 if (file_offset % 8 != 0) |
| 72 return Bad("Misaligned PE header"); | 112 return Bad("Misaligned PE header"); |
| 73 | 113 |
| 74 // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. | 114 // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. |
| 75 // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx | 115 // See http://msdn.microsoft.com/en-us/library/ms680336(VS.85).aspx |
| 76 // | 116 // |
| 77 // The first field of the IMAGE_NT_HEADERS is the signature. | 117 // The first field of the IMAGE_NT_HEADERS is the signature. |
| 78 if (!(pe_header[0] == 'P' && | 118 if (!(pe_header[0] == 'P' && |
| 79 pe_header[1] == 'E' && | 119 pe_header[1] == 'E' && |
| 80 pe_header[2] == 0 && | 120 pe_header[2] == 0 && |
| 81 pe_header[3] == 0)) | 121 pe_header[3] == 0)) |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 162 if (!b) { | 202 if (!b) { |
| 163 return Bad("malformed data directory"); | 203 return Bad("malformed data directory"); |
| 164 } | 204 } |
| 165 | 205 |
| 166 // Sections follow the optional header. | 206 // Sections follow the optional header. |
| 167 sections_ = | 207 sections_ = |
| 168 reinterpret_cast<const Section*>(optional_header + | 208 reinterpret_cast<const Section*>(optional_header + |
| 169 size_of_optional_header_); | 209 size_of_optional_header_); |
| 170 size_t detected_length = 0; | 210 size_t detected_length = 0; |
| 171 | 211 |
| 172 for (int i = 0; i < number_of_sections_; ++i) { | 212 for (int i = 0; i < number_of_sections_; ++i) { |
| 173 const Section* section = §ions_[i]; | 213 const Section* section = §ions_[i]; |
| 174 | 214 |
| 175 // TODO(sra): consider using the 'characteristics' field of the section | 215 // TODO(sra): consider using the 'characteristics' field of the section |
| 176 // header to see if the section contains instructions. | 216 // header to see if the section contains instructions. |
| 177 if (memcmp(section->name, ".text", 6) == 0) | 217 if (memcmp(section->name, ".text", 6) == 0) |
| 178 has_text_section_ = true; | 218 has_text_section_ = true; |
| 179 | 219 |
| 180 uint32_t section_end = | 220 uint32_t section_end = |
| 181 section->file_offset_of_raw_data + section->size_of_raw_data; | 221 section->file_offset_of_raw_data + section->size_of_raw_data; |
| 182 if (section_end > detected_length) | 222 if (section_end > detected_length) |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 260 | 300 |
| 261 // Walk through the two-byte entries. | 301 // Walk through the two-byte entries. |
| 262 for (const uint8_t* p = block + 8; p < end_entries; p += 2) { | 302 for (const uint8_t* p = block + 8; p < end_entries; p += 2) { |
| 263 uint16_t entry = ReadU16(p, 0); | 303 uint16_t entry = ReadU16(p, 0); |
| 264 int type = entry >> 12; | 304 int type = entry >> 12; |
| 265 int offset = entry & 0xFFF; | 305 int offset = entry & 0xFFF; |
| 266 | 306 |
| 267 RVA rva = page_rva + offset; | 307 RVA rva = page_rva + offset; |
| 268 // TODO(sebmarchand): Skip the relocs that live outside of the image. See | 308 // TODO(sebmarchand): Skip the relocs that live outside of the image. See |
| 269 // the version of this function in disassembler_win32_x86.cc. | 309 // the version of this function in disassembler_win32_x86.cc. |
| 270 if (type == 10) { // IMAGE_REL_BASED_DIR64 | 310 if (type == 10) { // IMAGE_REL_BASED_DIR64 |
| 271 relocs->push_back(rva); | 311 relocs->push_back(rva); |
| 272 } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE | 312 } else if (type == 0) { // IMAGE_REL_BASED_ABSOLUTE |
| 273 // Ignore, used as padding. | 313 // Ignore, used as padding. |
| 274 } else { | 314 } else { |
| 275 // Does not occur in Windows x64 executables. | 315 // Does not occur in Windows x64 executables. |
| 276 return Bad("unknown type of reloc"); | 316 return Bad("unknown type of reloc"); |
| 277 } | 317 } |
| 278 } | 318 } |
| 279 | 319 |
| 280 block += size; | 320 block += size; |
| 281 } | 321 } |
| 282 | 322 |
| 283 std::sort(relocs->begin(), relocs->end()); | 323 std::sort(relocs->begin(), relocs->end()); |
| 284 DCHECK(relocs->empty() || relocs->back() != kUnassignedRVA); | 324 DCHECK(relocs->empty() || relocs->back() != kUnassignedRVA); |
| 285 | 325 |
| 286 return true; | 326 return true; |
| 287 } | 327 } |
| 288 | 328 |
| 289 const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { | 329 const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { |
| 290 for (int i = 0; i < number_of_sections_; i++) { | 330 for (int i = 0; i < number_of_sections_; ++i) { |
| 291 const Section* section = §ions_[i]; | 331 const Section* section = §ions_[i]; |
| 292 uint32_t offset = rva - section->virtual_address; | 332 if (rva >= section->virtual_address) { |
| 293 if (offset < section->virtual_size) { | 333 FileOffset offset_in_section = rva - section->virtual_address; |
| 294 return section; | 334 if (offset_in_section < section->virtual_size) |
| 335 return section; |
| 295 } | 336 } |
| 296 } | 337 } |
| 297 return NULL; | 338 return nullptr; |
| 298 } | |
| 299 | |
| 300 int DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { | |
| 301 const Section* section = RVAToSection(rva); | |
| 302 if (section) { | |
| 303 uint32_t offset = rva - section->virtual_address; | |
| 304 if (offset < section->size_of_raw_data) { | |
| 305 return section->file_offset_of_raw_data + offset; | |
| 306 } else { | |
| 307 return kNoOffset; // In section but not in file (e.g. uninit data). | |
| 308 } | |
| 309 } | |
| 310 | |
| 311 // Small RVA values point into the file header in the loaded image. | |
| 312 // RVA 0 is the module load address which Windows uses as the module handle. | |
| 313 // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the | |
| 314 // DOS header. | |
| 315 if (rva == 0 || rva == 2) | |
| 316 return rva; | |
| 317 | |
| 318 NOTREACHED(); | |
| 319 return kNoOffset; | |
| 320 } | |
| 321 | |
| 322 const uint8_t* DisassemblerWin32X64::RVAToPointer(RVA rva) const { | |
| 323 int file_offset = RVAToFileOffset(rva); | |
| 324 if (file_offset == kNoOffset) | |
| 325 return NULL; | |
| 326 else | |
| 327 return OffsetToPointer(file_offset); | |
| 328 } | 339 } |
| 329 | 340 |
| 330 std::string DisassemblerWin32X64::SectionName(const Section* section) { | 341 std::string DisassemblerWin32X64::SectionName(const Section* section) { |
| 331 if (section == NULL) | 342 if (section == nullptr) |
| 332 return "<none>"; | 343 return "<none>"; |
| 333 char name[9]; | 344 char name[9]; |
| 334 memcpy(name, section->name, 8); | 345 memcpy(name, section->name, 8); |
| 335 name[8] = '\0'; // Ensure termination. | 346 name[8] = '\0'; // Ensure termination. |
| 336 return name; | 347 return name; |
| 337 } | 348 } |
| 338 | 349 |
| 339 CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) { | 350 CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) { |
| 340 // Walk all the bytes in the file, whether or not in a section. | 351 // Walk all the bytes in the file, whether or not in a section. |
| 341 uint32_t file_offset = 0; | 352 FileOffset file_offset = 0; |
| 342 while (file_offset < length()) { | 353 while (file_offset < length()) { |
| 343 const Section* section = FindNextSection(file_offset); | 354 const Section* section = FindNextSection(file_offset); |
| 344 if (section == NULL) { | 355 if (section == nullptr) { |
| 345 // No more sections. There should not be extra stuff following last | 356 // No more sections. There should not be extra stuff following last |
| 346 // section. | 357 // section. |
| 347 // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); | 358 // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); |
| 348 break; | 359 break; |
| 349 } | 360 } |
| 350 if (file_offset < section->file_offset_of_raw_data) { | 361 if (file_offset < section->file_offset_of_raw_data) { |
| 351 uint32_t section_start_offset = section->file_offset_of_raw_data; | 362 FileOffset section_start_offset = section->file_offset_of_raw_data; |
| 352 if(!ParseNonSectionFileRegion(file_offset, section_start_offset, | 363 if (!ParseNonSectionFileRegion(file_offset, section_start_offset, |
| 353 program)) | 364 program)) { |
| 354 return false; | 365 return false; |
| 366 } |
| 355 | 367 |
| 356 file_offset = section_start_offset; | 368 file_offset = section_start_offset; |
| 357 } | 369 } |
| 358 uint32_t end = file_offset + section->size_of_raw_data; | 370 FileOffset end = file_offset + section->size_of_raw_data; |
| 359 if (!ParseFileRegion(section, file_offset, end, program)) | 371 if (!ParseFileRegion(section, file_offset, end, program)) |
| 360 return false; | 372 return false; |
| 361 file_offset = end; | 373 file_offset = end; |
| 362 } | 374 } |
| 363 | 375 |
| 364 #if COURGETTE_HISTOGRAM_TARGETS | 376 #if COURGETTE_HISTOGRAM_TARGETS |
| 365 HistogramTargets("abs32 relocs", abs32_target_rvas_); | 377 HistogramTargets("abs32 relocs", abs32_target_rvas_); |
| 366 HistogramTargets("rel32 relocs", rel32_target_rvas_); | 378 HistogramTargets("rel32 relocs", rel32_target_rvas_); |
| 367 #endif | 379 #endif |
| 368 | 380 |
| 369 return true; | 381 return true; |
| 370 } | 382 } |
| 371 | 383 |
| 372 bool DisassemblerWin32X64::ParseAbs32Relocs() { | 384 bool DisassemblerWin32X64::ParseAbs32Relocs() { |
| 373 abs32_locations_.clear(); | 385 abs32_locations_.clear(); |
| 374 if (!ParseRelocs(&abs32_locations_)) | 386 if (!ParseRelocs(&abs32_locations_)) |
| 375 return false; | 387 return false; |
| 376 | 388 |
| 377 #if COURGETTE_HISTOGRAM_TARGETS | 389 #if COURGETTE_HISTOGRAM_TARGETS |
| 378 for (size_t i = 0; i < abs32_locations_.size(); ++i) { | 390 for (size_t i = 0; i < abs32_locations_.size(); ++i) { |
| 379 RVA rva = abs32_locations_[i]; | 391 RVA rva = abs32_locations_[i]; |
| 380 // The 4 bytes at the relocation are a reference to some address. | 392 // The 4 bytes at the relocation are a reference to some address. |
| 381 uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); | 393 uint32_t target_address = Read32LittleEndian(RVAToPointer(rva)); |
| 382 ++abs32_target_rvas_[target_address - image_base()]; | 394 ++abs32_target_rvas_[target_address - image_base()]; |
| 383 } | 395 } |
| 384 #endif | 396 #endif |
| 385 return true; | 397 return true; |
| 386 } | 398 } |
| 387 | 399 |
| 388 void DisassemblerWin32X64::ParseRel32RelocsFromSections() { | 400 void DisassemblerWin32X64::ParseRel32RelocsFromSections() { |
| 389 uint32_t file_offset = 0; | 401 FileOffset file_offset = 0; |
| 390 while (file_offset < length()) { | 402 while (file_offset < length()) { |
| 391 const Section* section = FindNextSection(file_offset); | 403 const Section* section = FindNextSection(file_offset); |
| 392 if (section == NULL) | 404 if (section == nullptr) |
| 393 break; | 405 break; |
| 394 if (file_offset < section->file_offset_of_raw_data) | 406 if (file_offset < section->file_offset_of_raw_data) |
| 395 file_offset = section->file_offset_of_raw_data; | 407 file_offset = section->file_offset_of_raw_data; |
| 396 ParseRel32RelocsFromSection(section); | 408 ParseRel32RelocsFromSection(section); |
| 397 file_offset += section->size_of_raw_data; | 409 file_offset += section->size_of_raw_data; |
| 398 } | 410 } |
| 399 std::sort(rel32_locations_.begin(), rel32_locations_.end()); | 411 std::sort(rel32_locations_.begin(), rel32_locations_.end()); |
| 400 DCHECK(rel32_locations_.empty() || | 412 DCHECK(rel32_locations_.empty() || |
| 401 rel32_locations_.back() != kUnassignedRVA); | 413 rel32_locations_.back() != kUnassignedRVA); |
| 402 | 414 |
| 403 #if COURGETTE_HISTOGRAM_TARGETS | 415 #if COURGETTE_HISTOGRAM_TARGETS |
| 404 VLOG(1) << "abs32_locations_ " << abs32_locations_.size() | 416 VLOG(1) << "abs32_locations_ " << abs32_locations_.size() |
| 405 << "\nrel32_locations_ " << rel32_locations_.size() | 417 << "\nrel32_locations_ " << rel32_locations_.size() |
| 406 << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() | 418 << "\nabs32_target_rvas_ " << abs32_target_rvas_.size() |
| 407 << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); | 419 << "\nrel32_target_rvas_ " << rel32_target_rvas_.size(); |
| 408 | 420 |
| 409 int common = 0; | 421 int common = 0; |
| 410 std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); | 422 std::map<RVA, int>::iterator abs32_iter = abs32_target_rvas_.begin(); |
| 411 std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); | 423 std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); |
| 412 while (abs32_iter != abs32_target_rvas_.end() && | 424 while (abs32_iter != abs32_target_rvas_.end() && |
| 413 rel32_iter != rel32_target_rvas_.end()) { | 425 rel32_iter != rel32_target_rvas_.end()) { |
| 414 if (abs32_iter->first < rel32_iter->first) | 426 if (abs32_iter->first < rel32_iter->first) { |
| 415 ++abs32_iter; | 427 ++abs32_iter; |
| 416 else if (rel32_iter->first < abs32_iter->first) | 428 } else if (rel32_iter->first < abs32_iter->first) { |
| 417 ++rel32_iter; | 429 ++rel32_iter; |
| 418 else { | 430 } else { |
| 419 ++common; | 431 ++common; |
| 420 ++abs32_iter; | 432 ++abs32_iter; |
| 421 ++rel32_iter; | 433 ++rel32_iter; |
| 422 } | 434 } |
| 423 } | 435 } |
| 424 VLOG(1) << "common " << common; | 436 VLOG(1) << "common " << common; |
| 425 #endif | 437 #endif |
| 426 } | 438 } |
| 427 | 439 |
| 428 void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { | 440 void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { |
| 429 // TODO(sra): use characteristic. | 441 // TODO(sra): use characteristic. |
| 430 bool isCode = strcmp(section->name, ".text") == 0; | 442 bool isCode = strcmp(section->name, ".text") == 0; |
| 431 if (!isCode) | 443 if (!isCode) |
| 432 return; | 444 return; |
| 433 | 445 |
| 434 uint32_t start_file_offset = section->file_offset_of_raw_data; | 446 FileOffset start_file_offset = section->file_offset_of_raw_data; |
| 435 uint32_t end_file_offset = start_file_offset + section->size_of_raw_data; | 447 FileOffset end_file_offset = start_file_offset + section->size_of_raw_data; |
| 436 RVA relocs_start_rva = base_relocation_table().address_; | 448 RVA relocs_start_rva = base_relocation_table().address_; |
| 437 | 449 |
| 438 const uint8_t* start_pointer = OffsetToPointer(start_file_offset); | 450 const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); |
| 439 const uint8_t* end_pointer = OffsetToPointer(end_file_offset); | 451 const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); |
| 440 | 452 |
| 441 RVA start_rva = FileOffsetToRVA(start_file_offset); | 453 RVA start_rva = FileOffsetToRVA(start_file_offset); |
| 442 RVA end_rva = start_rva + section->virtual_size; | 454 RVA end_rva = start_rva + section->virtual_size; |
| 443 | 455 |
| 444 // Quick way to convert from Pointer to RVA within a single Section is to | 456 // Quick way to convert from Pointer to RVA within a single Section is to |
| 445 // subtract 'pointer_to_rva'. | 457 // subtract |pointer_to_rva|. |
| 446 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; | 458 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; |
| 447 | 459 |
| 448 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | 460 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); |
| 449 | 461 |
| 450 // Find the rel32 relocations. | 462 // Find the rel32 relocations. |
| 451 const uint8_t* p = start_pointer; | 463 const uint8_t* p = start_pointer; |
| 452 while (p < end_pointer) { | 464 while (p < end_pointer) { |
| 453 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); | 465 RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva); |
| 454 if (current_rva == relocs_start_rva) { | 466 if (current_rva == relocs_start_rva) { |
| 455 uint32_t relocs_size = base_relocation_table().size_; | 467 uint32_t relocs_size = base_relocation_table().size_; |
| 456 if (relocs_size) { | 468 if (relocs_size) { |
| 457 p += relocs_size; | 469 p += relocs_size; |
| 458 continue; | 470 continue; |
| 459 } | 471 } |
| 460 } | 472 } |
| 461 | 473 |
| 462 //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) | |
| 463 // ++abs32_pos; | |
| 464 | |
| 465 // Heuristic discovery of rel32 locations in instruction stream: are the | 474 // Heuristic discovery of rel32 locations in instruction stream: are the |
| 466 // next few bytes the start of an instruction containing a rel32 | 475 // next few bytes the start of an instruction containing a rel32 |
| 467 // addressing mode? | 476 // addressing mode? |
| 468 const uint8_t* rel32 = NULL; | 477 const uint8_t* rel32 = nullptr; |
| 469 bool is_rip_relative = false; | 478 bool is_rip_relative = false; |
| 470 | 479 |
| 471 if (p + 5 <= end_pointer) { | 480 if (p + 5 <= end_pointer) { |
| 472 if (*p == 0xE8 || *p == 0xE9) // jmp rel32 and call rel32 | 481 if (*p == 0xE8 || *p == 0xE9) // jmp rel32 and call rel32 |
| 473 rel32 = p + 1; | 482 rel32 = p + 1; |
| 474 } | 483 } |
| 475 if (p + 6 <= end_pointer) { | 484 if (p + 6 <= end_pointer) { |
| 476 if (*p == 0x0F && (*(p + 1) & 0xF0) == 0x80) { // Jcc long form | 485 if (*p == 0x0F && (*(p + 1) & 0xF0) == 0x80) { // Jcc long form |
| 477 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely | 486 if (p[1] != 0x8A && p[1] != 0x8B) // JPE/JPO unlikely |
| 478 rel32 = p + 2; | 487 rel32 = p + 2; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 509 // Beginning of abs32 reloc is before end of rel32 reloc so they | 518 // Beginning of abs32 reloc is before end of rel32 reloc so they |
| 510 // overlap. Skip four bytes past the abs32 reloc. | 519 // overlap. Skip four bytes past the abs32 reloc. |
| 511 p += (*abs32_pos + 4) - current_rva; | 520 p += (*abs32_pos + 4) - current_rva; |
| 512 continue; | 521 continue; |
| 513 } | 522 } |
| 514 } | 523 } |
| 515 | 524 |
| 516 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); | 525 RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); |
| 517 // To be valid, rel32 target must be within image, and within this | 526 // To be valid, rel32 target must be within image, and within this |
| 518 // section. | 527 // section. |
| 519 if (IsValidRVA(target_rva) && | 528 if (target_rva < size_of_image_ && // Subsumes rva != kUnassignedRVA. |
| 520 (is_rip_relative || | 529 (is_rip_relative || |
| 521 (start_rva <= target_rva && target_rva < end_rva))) { | 530 (start_rva <= target_rva && target_rva < end_rva))) { |
| 522 rel32_locations_.push_back(rel32_rva); | 531 rel32_locations_.push_back(rel32_rva); |
| 523 #if COURGETTE_HISTOGRAM_TARGETS | 532 #if COURGETTE_HISTOGRAM_TARGETS |
| 524 ++rel32_target_rvas_[target_rva]; | 533 ++rel32_target_rvas_[target_rva]; |
| 525 #endif | 534 #endif |
| 526 p = rel32 + 4; | 535 p = rel32 + 4; |
| 527 continue; | 536 continue; |
| 528 } | 537 } |
| 529 } | 538 } |
| 530 p += 1; | 539 p += 1; |
| 531 } | 540 } |
| 532 } | 541 } |
| 533 | 542 |
| 534 CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( | 543 CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( |
| 535 uint32_t start_file_offset, | 544 FileOffset start_file_offset, |
| 536 uint32_t end_file_offset, | 545 FileOffset end_file_offset, |
| 537 AssemblyProgram* program) { | 546 AssemblyProgram* program) { |
| 538 if (incomplete_disassembly_) | 547 if (incomplete_disassembly_) |
| 539 return true; | 548 return true; |
| 540 | 549 |
| 541 if (end_file_offset > start_file_offset) { | 550 if (end_file_offset > start_file_offset) { |
| 542 if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), | 551 if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), |
| 543 end_file_offset - start_file_offset)) { | 552 end_file_offset - start_file_offset)) { |
| 544 return false; | 553 return false; |
| 545 } | 554 } |
| 546 } | 555 } |
| 547 | 556 |
| 548 return true; | 557 return true; |
| 549 } | 558 } |
| 550 | 559 |
| 551 CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, | 560 CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, |
| 552 uint32_t start_file_offset, | 561 FileOffset start_file_offset, |
| 553 uint32_t end_file_offset, | 562 FileOffset end_file_offset, |
| 554 AssemblyProgram* program) { | 563 AssemblyProgram* program) { |
| 555 RVA relocs_start_rva = base_relocation_table().address_; | 564 RVA relocs_start_rva = base_relocation_table().address_; |
| 556 | 565 |
| 557 const uint8_t* start_pointer = OffsetToPointer(start_file_offset); | 566 const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); |
| 558 const uint8_t* end_pointer = OffsetToPointer(end_file_offset); | 567 const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); |
| 559 | 568 |
| 560 RVA start_rva = FileOffsetToRVA(start_file_offset); | 569 RVA start_rva = FileOffsetToRVA(start_file_offset); |
| 561 RVA end_rva = start_rva + section->virtual_size; | 570 RVA end_rva = start_rva + section->virtual_size; |
| 562 | 571 |
| 563 // Quick way to convert from Pointer to RVA within a single Section is to | 572 // Quick way to convert from Pointer to RVA within a single Section is to |
| 564 // subtract 'pointer_to_rva'. | 573 // subtract 'pointer_to_rva'. |
| 565 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; | 574 const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; |
| 566 | 575 |
| 567 std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); | 576 std::vector<RVA>::iterator rel32_pos = rel32_locations_.begin(); |
| 568 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); | 577 std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 657 p != h.rend(); | 666 p != h.rend(); |
| 658 ++p) { | 667 ++p) { |
| 659 ++index; | 668 ++index; |
| 660 if (index <= kFirstN || p->first <= 3) { | 669 if (index <= kFirstN || p->first <= 3) { |
| 661 if (someSkipped) { | 670 if (someSkipped) { |
| 662 std::cout << "..." << std::endl; | 671 std::cout << "..." << std::endl; |
| 663 } | 672 } |
| 664 size_t count = p->second.size(); | 673 size_t count = p->second.size(); |
| 665 std::cout << std::dec << p->first << ": " << count; | 674 std::cout << std::dec << p->first << ": " << count; |
| 666 if (count <= 2) { | 675 if (count <= 2) { |
| 667 for (size_t i = 0; i < count; ++i) | 676 for (size_t i = 0; i < count; ++i) |
| 668 std::cout << " " << DescribeRVA(p->second[i]); | 677 std::cout << " " << DescribeRVA(p->second[i]); |
| 669 } | 678 } |
| 670 std::cout << std::endl; | 679 std::cout << std::endl; |
| 671 someSkipped = false; | 680 someSkipped = false; |
| 672 } else { | 681 } else { |
| 673 someSkipped = true; | 682 someSkipped = true; |
| 674 } | 683 } |
| 675 } | 684 } |
| 676 } | 685 } |
| 677 #endif // COURGETTE_HISTOGRAM_TARGETS | 686 #endif // COURGETTE_HISTOGRAM_TARGETS |
| 678 | 687 |
| 679 | |
| 680 // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except | 688 // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except |
| 681 // that during development I'm finding I need to call it when compiled in | 689 // that during development I'm finding I need to call it when compiled in |
| 682 // Release mode. Hence: | 690 // Release mode. Hence: |
| 683 // TODO(sra): make this compile only for debug mode. | 691 // TODO(sra): make this compile only for debug mode. |
| 684 std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const { | 692 std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const { |
| 685 const Section* section = RVAToSection(rva); | 693 const Section* section = RVAToSection(rva); |
| 686 std::ostringstream s; | 694 std::ostringstream s; |
| 687 s << std::hex << rva; | 695 s << std::hex << rva; |
| 688 if (section) { | 696 if (section) { |
| 689 s << " ("; | 697 s << " ("; |
| 690 s << SectionName(section) << "+" | 698 s << SectionName(section) << "+" |
| 691 << std::hex << (rva - section->virtual_address) | 699 << std::hex << (rva - section->virtual_address) |
| 692 << ")"; | 700 << ")"; |
| 693 } | 701 } |
| 694 return s.str(); | 702 return s.str(); |
| 695 } | 703 } |
| 696 | 704 |
| 697 const Section* DisassemblerWin32X64::FindNextSection( | 705 const Section* DisassemblerWin32X64::FindNextSection( |
| 698 uint32_t fileOffset) const { | 706 FileOffset file_offset) const { |
| 699 const Section* best = 0; | 707 const Section* best = 0; |
| 700 for (int i = 0; i < number_of_sections_; i++) { | 708 for (int i = 0; i < number_of_sections_; ++i) { |
| 701 const Section* section = §ions_[i]; | 709 const Section* section = §ions_[i]; |
| 702 if (section->size_of_raw_data > 0) { // i.e. has data in file. | 710 if (section->size_of_raw_data > 0) { // i.e. has data in file. |
| 703 if (fileOffset <= section->file_offset_of_raw_data) { | 711 if (file_offset <= section->file_offset_of_raw_data) { |
| 704 if (best == 0 || | 712 if (best == 0 || |
| 705 section->file_offset_of_raw_data < best->file_offset_of_raw_data) { | 713 section->file_offset_of_raw_data < best->file_offset_of_raw_data) { |
| 706 best = section; | 714 best = section; |
| 707 } | 715 } |
| 708 } | 716 } |
| 709 } | 717 } |
| 710 } | 718 } |
| 711 return best; | 719 return best; |
| 712 } | 720 } |
| 713 | 721 |
| 714 RVA DisassemblerWin32X64::FileOffsetToRVA(uint32_t file_offset) const { | |
| 715 for (int i = 0; i < number_of_sections_; i++) { | |
| 716 const Section* section = §ions_[i]; | |
| 717 uint32_t offset = file_offset - section->file_offset_of_raw_data; | |
| 718 if (offset < section->size_of_raw_data) { | |
| 719 return section->virtual_address + offset; | |
| 720 } | |
| 721 } | |
| 722 return 0; | |
| 723 } | |
| 724 | |
| 725 bool DisassemblerWin32X64::ReadDataDirectory( | 722 bool DisassemblerWin32X64::ReadDataDirectory( |
| 726 int index, | 723 int index, |
| 727 ImageDataDirectory* directory) { | 724 ImageDataDirectory* directory) { |
| 728 | 725 |
| 729 if (index < number_of_data_directories_) { | 726 if (index < number_of_data_directories_) { |
| 730 size_t offset = index * 8 + offset_of_data_directories_; | 727 FileOffset file_offset = index * 8 + offset_of_data_directories_; |
| 731 if (offset >= size_of_optional_header_) | 728 if (file_offset >= size_of_optional_header_) |
| 732 return Bad("number of data directories inconsistent"); | 729 return Bad("number of data directories inconsistent"); |
| 733 const uint8_t* data_directory = optional_header_ + offset; | 730 const uint8_t* data_directory = optional_header_ + file_offset; |
| 734 if (data_directory < start() || | 731 if (data_directory < start() || |
| 735 data_directory + 8 >= end()) | 732 data_directory + 8 >= end()) |
| 736 return Bad("data directory outside image"); | 733 return Bad("data directory outside image"); |
| 737 RVA rva = ReadU32(data_directory, 0); | 734 RVA rva = ReadU32(data_directory, 0); |
| 738 size_t size = ReadU32(data_directory, 4); | 735 size_t size = ReadU32(data_directory, 4); |
| 739 if (size > size_of_image_) | 736 if (size > size_of_image_) |
| 740 return Bad("data directory size too big"); | 737 return Bad("data directory size too big"); |
| 741 | 738 |
| 742 // TODO(sra): validate RVA. | 739 // TODO(sra): validate RVA. |
| 743 directory->address_ = rva; | 740 directory->address_ = rva; |
| 744 directory->size_ = static_cast<uint32_t>(size); | 741 directory->size_ = static_cast<uint32_t>(size); |
| 745 return true; | 742 return true; |
| 746 } else { | 743 } else { |
| 747 directory->address_ = 0; | 744 directory->address_ = 0; |
| 748 directory->size_ = 0; | 745 directory->size_ = 0; |
| 749 return true; | 746 return true; |
| 750 } | 747 } |
| 751 } | 748 } |
| 752 | 749 |
| 753 } // namespace courgette | 750 } // namespace courgette |
| OLD | NEW |