Chromium Code Reviews| Index: courgette/disassembler_win32_x64.cc |
| diff --git a/courgette/disassembler_win32_x64.cc b/courgette/disassembler_win32_x64.cc |
| index 74b0fe48a7fe7800abe9ffebcf1637daabc292db..a6a90118867c4dae2058c6204b3b396a30119077 100644 |
| --- a/courgette/disassembler_win32_x64.cc |
| +++ b/courgette/disassembler_win32_x64.cc |
| @@ -8,28 +8,25 @@ |
| #include <stdint.h> |
| #include <algorithm> |
| -#include <string> |
| -#include <vector> |
| +#include <iostream> |
| #include "base/logging.h" |
| #include "base/numerics/safe_conversions.h" |
| - |
| #include "courgette/assembly_program.h" |
| #include "courgette/courgette.h" |
| -#include "courgette/encoded_program.h" |
| namespace courgette { |
| DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) |
| - : Disassembler(start, length), |
| + : Disassembler(start, length), |
| incomplete_disassembly_(false), |
| is_PE32_plus_(false), |
| - optional_header_(NULL), |
| + optional_header_(nullptr), |
| size_of_optional_header_(0), |
| offset_of_data_directories_(0), |
| machine_type_(0), |
| number_of_sections_(0), |
| - sections_(NULL), |
| + sections_(nullptr), |
| has_text_section_(false), |
| size_of_code_(0), |
| size_of_initialized_data_(0), |
| @@ -41,6 +38,45 @@ DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length) |
| number_of_data_directories_(0) { |
| } |
| +FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { |
| + const Section* section = RVAToSection(rva); |
| + if (section != nullptr) { |
| + uint32_t offset_in_section = rva - section->virtual_address; |
| + // Need this extra check, since an |rva| may be valid for a section, but is |
| + // non-existent in an image (e.g. uninit data). |
| + if (offset_in_section >= section->size_of_raw_data) |
| + return kNoFileOffset; |
| + |
| + return static_cast<FileOffset>( |
| + section->file_offset_of_raw_data + offset_in_section); |
| + } |
| + |
| + // Small RVA values point into the file header in the loaded image. |
| + // RVA 0 is the module load address which Windows uses as the module handle. |
| + // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the |
| + // DOS header. |
| + if (rva == 0 || rva == 2) |
| + return static_cast<FileOffset>(rva); |
| + |
| + NOTREACHED(); |
| + return kNoFileOffset; |
| +} |
| + |
| +RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const { |
| + for (int i = 0; i < number_of_sections_; ++i) { |
| + const Section* section = §ions_[i]; |
| + if (file_offset >= section->file_offset_of_raw_data) { |
| + uint32_t offset_in_section = |
| + file_offset - section->file_offset_of_raw_data; |
| + if (offset_in_section < section->size_of_raw_data) |
| + return static_cast<RVA>(section->virtual_address + offset_in_section); |
| + } |
| + } |
| + |
| + NOTREACHED(); |
| + return kNoRVA; |
| +} |
| + |
| // ParseHeader attempts to match up the buffer with the Windows data |
| // structures that exist within a Windows 'Portable Executable' format file. |
| // Returns 'true' if the buffer matches, and 'false' if the data looks |
| @@ -57,18 +93,19 @@ bool DisassemblerWin32X64::ParseHeader() { |
| return Bad("Not MZ"); |
| // offset from DOS header to PE header is stored in DOS header. |
| - uint32_t offset = ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader); |
| + FileOffset file_offset = static_cast<FileOffset>( |
| + ReadU32(start(), kOffsetOfFileAddressOfNewExeHeader)); |
| - if (offset >= length()) |
| + if (file_offset >= length()) |
| return Bad("Bad offset to PE header"); |
| - const uint8_t* const pe_header = OffsetToPointer(offset); |
| + const uint8_t* const pe_header = FileOffsetToPointer(file_offset); |
| const size_t kMinPEHeaderSize = 4 /*signature*/ + kSizeOfCoffHeader; |
| if (pe_header <= start() || |
| pe_header >= end() - kMinPEHeaderSize) |
| - return Bad("Bad offset to PE header"); |
| + return Bad("Bad file offset to PE header"); |
| - if (offset % 8 != 0) |
| + if (file_offset % 8 != 0) |
| return Bad("Misaligned PE header"); |
| // The 'PE' header is an IMAGE_NT_HEADERS structure as defined in WINNT.H. |
| @@ -169,7 +206,7 @@ bool DisassemblerWin32X64::ParseHeader() { |
| size_of_optional_header_); |
| size_t detected_length = 0; |
| - for (int i = 0; i < number_of_sections_; ++i) { |
| + for (int i = 0; i < number_of_sections_; ++i) { |
| const Section* section = §ions_[i]; |
| // TODO(sra): consider using the 'characteristics' field of the section |
| @@ -287,48 +324,19 @@ bool DisassemblerWin32X64::ParseRelocs(std::vector<RVA> *relocs) { |
| } |
| const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const { |
| - for (int i = 0; i < number_of_sections_; i++) { |
| + for (int i = 0; i < number_of_sections_; ++i) { |
| const Section* section = §ions_[i]; |
| - uint32_t offset = rva - section->virtual_address; |
| - if (offset < section->virtual_size) { |
| - return section; |
| + if (rva >= section->virtual_address) { |
| + uint32_t offset_in_section = rva - section->virtual_address; |
| + if (offset_in_section < section->virtual_size) |
| + return section; |
| } |
| } |
| - return NULL; |
| -} |
| - |
| -int DisassemblerWin32X64::RVAToFileOffset(RVA rva) const { |
| - const Section* section = RVAToSection(rva); |
| - if (section) { |
| - uint32_t offset = rva - section->virtual_address; |
| - if (offset < section->size_of_raw_data) { |
| - return section->file_offset_of_raw_data + offset; |
| - } else { |
| - return kNoOffset; // In section but not in file (e.g. uninit data). |
| - } |
| - } |
| - |
| - // Small RVA values point into the file header in the loaded image. |
| - // RVA 0 is the module load address which Windows uses as the module handle. |
| - // RVA 2 sometimes occurs, I'm not sure what it is, but it would map into the |
| - // DOS header. |
| - if (rva == 0 || rva == 2) |
| - return rva; |
| - |
| - NOTREACHED(); |
| - return kNoOffset; |
| -} |
| - |
| -const uint8_t* DisassemblerWin32X64::RVAToPointer(RVA rva) const { |
| - int file_offset = RVAToFileOffset(rva); |
| - if (file_offset == kNoOffset) |
| - return NULL; |
| - else |
| - return OffsetToPointer(file_offset); |
| + return nullptr; |
| } |
| std::string DisassemblerWin32X64::SectionName(const Section* section) { |
| - if (section == NULL) |
| + if (section == nullptr) |
| return "<none>"; |
| char name[9]; |
| memcpy(name, section->name, 8); |
| @@ -338,24 +346,25 @@ std::string DisassemblerWin32X64::SectionName(const Section* section) { |
| CheckBool DisassemblerWin32X64::ParseFile(AssemblyProgram* program) { |
| // Walk all the bytes in the file, whether or not in a section. |
| - uint32_t file_offset = 0; |
| + FileOffset file_offset = 0; |
| while (file_offset < length()) { |
| const Section* section = FindNextSection(file_offset); |
| - if (section == NULL) { |
| - // No more sections. There should not be extra stuff following last |
| + if (section == nullptr) { |
| + // No more sections. There should not be extra stuff following last |
| // section. |
| // ParseNonSectionFileRegion(file_offset, pe_info().length(), program); |
| break; |
| } |
| if (file_offset < section->file_offset_of_raw_data) { |
| - uint32_t section_start_offset = section->file_offset_of_raw_data; |
| + FileOffset section_start_offset = section->file_offset_of_raw_data; |
| if(!ParseNonSectionFileRegion(file_offset, section_start_offset, |
| - program)) |
| + program)) { |
| return false; |
| + } |
| file_offset = section_start_offset; |
| } |
| - uint32_t end = file_offset + section->size_of_raw_data; |
| + FileOffset end = file_offset + section->size_of_raw_data; |
| if (!ParseFileRegion(section, file_offset, end, program)) |
| return false; |
| file_offset = end; |
| @@ -389,7 +398,7 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSections() { |
| uint32_t file_offset = 0; |
| while (file_offset < length()) { |
| const Section* section = FindNextSection(file_offset); |
| - if (section == NULL) |
| + if (section == nullptr) |
| break; |
| if (file_offset < section->file_offset_of_raw_data) |
| file_offset = section->file_offset_of_raw_data; |
| @@ -411,11 +420,11 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSections() { |
| std::map<RVA, int>::iterator rel32_iter = rel32_target_rvas_.begin(); |
| while (abs32_iter != abs32_target_rvas_.end() && |
| rel32_iter != rel32_target_rvas_.end()) { |
| - if (abs32_iter->first < rel32_iter->first) |
| + if (abs32_iter->first < rel32_iter->first) { |
| ++abs32_iter; |
| - else if (rel32_iter->first < abs32_iter->first) |
| + } else if (rel32_iter->first < abs32_iter->first) { |
| ++rel32_iter; |
| - else { |
| + } else { |
| ++common; |
| ++abs32_iter; |
| ++rel32_iter; |
| @@ -431,18 +440,18 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { |
| if (!isCode) |
| return; |
| - uint32_t start_file_offset = section->file_offset_of_raw_data; |
| - uint32_t end_file_offset = start_file_offset + section->size_of_raw_data; |
| + FileOffset start_file_offset = section->file_offset_of_raw_data; |
| + FileOffset end_file_offset = start_file_offset + section->size_of_raw_data; |
| RVA relocs_start_rva = base_relocation_table().address_; |
| - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); |
| - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); |
| + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); |
| + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); |
| RVA start_rva = FileOffsetToRVA(start_file_offset); |
| RVA end_rva = start_rva + section->virtual_size; |
| // Quick way to convert from Pointer to RVA within a single Section is to |
| - // subtract 'pointer_to_rva'. |
| + // subtract |pointer_to_rva|. |
| const uint8_t* const adjust_pointer_to_rva = start_pointer - start_rva; |
| std::vector<RVA>::iterator abs32_pos = abs32_locations_.begin(); |
| @@ -459,13 +468,10 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { |
| } |
| } |
| - //while (abs32_pos != abs32_locations_.end() && *abs32_pos < current_rva) |
| - // ++abs32_pos; |
| - |
| // Heuristic discovery of rel32 locations in instruction stream: are the |
| // next few bytes the start of an instruction containing a rel32 |
| // addressing mode? |
| - const uint8_t* rel32 = NULL; |
| + const uint8_t* rel32 = nullptr; |
| bool is_rip_relative = false; |
| if (p + 5 <= end_pointer) { |
| @@ -516,9 +522,9 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { |
| RVA target_rva = rel32_rva + 4 + Read32LittleEndian(rel32); |
| // To be valid, rel32 target must be within image, and within this |
| // section. |
| - if (IsValidRVA(target_rva) && |
| + if (target_rva < size_of_image_ && // Subsumes rva != kUnassignedRVA. |
| (is_rip_relative || |
|
Will Harris
2016/03/08 19:09:07
is git cl format okay with this indenting on line
huangs
2016/03/09 17:53:29
"git cl format" recommends single relative indent.
|
| - (start_rva <= target_rva && target_rva < end_rva))) { |
| + (start_rva <= target_rva && target_rva < end_rva))) { |
| rel32_locations_.push_back(rel32_rva); |
| #if COURGETTE_HISTOGRAM_TARGETS |
| ++rel32_target_rvas_[target_rva]; |
| @@ -532,14 +538,14 @@ void DisassemblerWin32X64::ParseRel32RelocsFromSection(const Section* section) { |
| } |
| CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( |
| - uint32_t start_file_offset, |
| - uint32_t end_file_offset, |
| + FileOffset start_file_offset, |
| + FileOffset end_file_offset, |
| AssemblyProgram* program) { |
| if (incomplete_disassembly_) |
| return true; |
| if (end_file_offset > start_file_offset) { |
| - if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), |
| + if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset), |
| end_file_offset - start_file_offset)) { |
| return false; |
| } |
| @@ -549,13 +555,13 @@ CheckBool DisassemblerWin32X64::ParseNonSectionFileRegion( |
| } |
| CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section, |
| - uint32_t start_file_offset, |
| - uint32_t end_file_offset, |
| + FileOffset start_file_offset, |
| + FileOffset end_file_offset, |
| AssemblyProgram* program) { |
| RVA relocs_start_rva = base_relocation_table().address_; |
| - const uint8_t* start_pointer = OffsetToPointer(start_file_offset); |
| - const uint8_t* end_pointer = OffsetToPointer(end_file_offset); |
| + const uint8_t* start_pointer = FileOffsetToPointer(start_file_offset); |
| + const uint8_t* end_pointer = FileOffsetToPointer(end_file_offset); |
| RVA start_rva = FileOffsetToRVA(start_file_offset); |
| RVA end_rva = start_rva + section->virtual_size; |
| @@ -664,7 +670,7 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind, |
| size_t count = p->second.size(); |
| std::cout << std::dec << p->first << ": " << count; |
| if (count <= 2) { |
| - for (size_t i = 0; i < count; ++i) |
| + for (size_t i = 0; i < count; ++i) |
| std::cout << " " << DescribeRVA(p->second[i]); |
| } |
| std::cout << std::endl; |
| @@ -676,7 +682,6 @@ void DisassemblerWin32X64::HistogramTargets(const char* kind, |
| } |
| #endif // COURGETTE_HISTOGRAM_TARGETS |
| - |
| // DescribeRVA is for debugging only. I would put it under #ifdef DEBUG except |
| // that during development I'm finding I need to call it when compiled in |
| // Release mode. Hence: |
| @@ -695,12 +700,12 @@ std::string DisassemblerWin32X64::DescribeRVA(RVA rva) const { |
| } |
| const Section* DisassemblerWin32X64::FindNextSection( |
| - uint32_t fileOffset) const { |
| + FileOffset file_offset) const { |
| const Section* best = 0; |
| - for (int i = 0; i < number_of_sections_; i++) { |
| + for (int i = 0; i < number_of_sections_; ++i) { |
| const Section* section = §ions_[i]; |
| if (section->size_of_raw_data > 0) { // i.e. has data in file. |
| - if (fileOffset <= section->file_offset_of_raw_data) { |
| + if (file_offset <= section->file_offset_of_raw_data) { |
| if (best == 0 || |
| section->file_offset_of_raw_data < best->file_offset_of_raw_data) { |
| best = section; |
| @@ -711,26 +716,15 @@ const Section* DisassemblerWin32X64::FindNextSection( |
| return best; |
| } |
| -RVA DisassemblerWin32X64::FileOffsetToRVA(uint32_t file_offset) const { |
| - for (int i = 0; i < number_of_sections_; i++) { |
| - const Section* section = §ions_[i]; |
| - uint32_t offset = file_offset - section->file_offset_of_raw_data; |
| - if (offset < section->size_of_raw_data) { |
| - return section->virtual_address + offset; |
| - } |
| - } |
| - return 0; |
| -} |
| - |
| bool DisassemblerWin32X64::ReadDataDirectory( |
| int index, |
| ImageDataDirectory* directory) { |
| if (index < number_of_data_directories_) { |
| - size_t offset = index * 8 + offset_of_data_directories_; |
| - if (offset >= size_of_optional_header_) |
| + FileOffset file_offset = index * 8 + offset_of_data_directories_; |
| + if (file_offset >= size_of_optional_header_) |
| return Bad("number of data directories inconsistent"); |
| - const uint8_t* data_directory = optional_header_ + offset; |
| + const uint8_t* data_directory = optional_header_ + file_offset; |
| if (data_directory < start() || |
| data_directory + 8 >= end()) |
| return Bad("data directory outside image"); |