OLD | NEW |
| (Empty) |
1 // -*- mode: C++ -*- | |
2 | |
3 // Copyright (c) 2010 Google Inc. All Rights Reserved. | |
4 // | |
5 // Redistribution and use in source and binary forms, with or without | |
6 // modification, are permitted provided that the following conditions are | |
7 // met: | |
8 // | |
9 // * Redistributions of source code must retain the above copyright | |
10 // notice, this list of conditions and the following disclaimer. | |
11 // * Redistributions in binary form must reproduce the above | |
12 // copyright notice, this list of conditions and the following disclaimer | |
13 // in the documentation and/or other materials provided with the | |
14 // distribution. | |
15 // * Neither the name of Google Inc. nor the names of its | |
16 // contributors may be used to endorse or promote products derived from | |
17 // this software without specific prior written permission. | |
18 // | |
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | |
31 #ifndef COMMON_DWARF_BYTEREADER_H__ | |
32 #define COMMON_DWARF_BYTEREADER_H__ | |
33 | |
34 #include <string> | |
35 #include "common/dwarf/types.h" | |
36 #include "common/dwarf/dwarf2enums.h" | |
37 | |
38 namespace dwarf2reader { | |
39 | |
40 // We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN | |
41 // because it conflicts with a macro | |
42 enum Endianness { | |
43 ENDIANNESS_BIG, | |
44 ENDIANNESS_LITTLE | |
45 }; | |
46 | |
47 // A ByteReader knows how to read single- and multi-byte values of | |
48 // various endiannesses, sizes, and encodings, as used in DWARF | |
49 // debugging information and Linux C++ exception handling data. | |
50 class ByteReader { | |
51 public: | |
52 // Construct a ByteReader capable of reading one-, two-, four-, and | |
53 // eight-byte values according to ENDIANNESS, absolute machine-sized | |
54 // addresses, DWARF-style "initial length" values, signed and | |
55 // unsigned LEB128 numbers, and Linux C++ exception handling data's | |
56 // encoded pointers. | |
57 explicit ByteReader(enum Endianness endianness); | |
58 virtual ~ByteReader(); | |
59 | |
60 // Read a single byte from BUFFER and return it as an unsigned 8 bit | |
61 // number. | |
62 uint8 ReadOneByte(const char* buffer) const; | |
63 | |
64 // Read two bytes from BUFFER and return them as an unsigned 16 bit | |
65 // number, using this ByteReader's endianness. | |
66 uint16 ReadTwoBytes(const char* buffer) const; | |
67 | |
68 // Read four bytes from BUFFER and return them as an unsigned 32 bit | |
69 // number, using this ByteReader's endianness. This function returns | |
70 // a uint64 so that it is compatible with ReadAddress and | |
71 // ReadOffset. The number it returns will never be outside the range | |
72 // of an unsigned 32 bit integer. | |
73 uint64 ReadFourBytes(const char* buffer) const; | |
74 | |
75 // Read eight bytes from BUFFER and return them as an unsigned 64 | |
76 // bit number, using this ByteReader's endianness. | |
77 uint64 ReadEightBytes(const char* buffer) const; | |
78 | |
79 // Read an unsigned LEB128 (Little Endian Base 128) number from | |
80 // BUFFER and return it as an unsigned 64 bit integer. Set LEN to | |
81 // the number of bytes read. | |
82 // | |
83 // The unsigned LEB128 representation of an integer N is a variable | |
84 // number of bytes: | |
85 // | |
86 // - If N is between 0 and 0x7f, then its unsigned LEB128 | |
87 // representation is a single byte whose value is N. | |
88 // | |
89 // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) | | |
90 // 0x80, followed by the unsigned LEB128 representation of N / | |
91 // 128, rounded towards negative infinity. | |
92 // | |
93 // In other words, we break VALUE into groups of seven bits, put | |
94 // them in little-endian order, and then write them as eight-bit | |
95 // bytes with the high bit on all but the last. | |
96 uint64 ReadUnsignedLEB128(const char* buffer, size_t* len) const; | |
97 | |
98 // Read a signed LEB128 number from BUFFER and return it as an | |
99 // signed 64 bit integer. Set LEN to the number of bytes read. | |
100 // | |
101 // The signed LEB128 representation of an integer N is a variable | |
102 // number of bytes: | |
103 // | |
104 // - If N is between -0x40 and 0x3f, then its signed LEB128 | |
105 // representation is a single byte whose value is N in two's | |
106 // complement. | |
107 // | |
108 // - Otherwise, its signed LEB128 representation is (N & 0x7f) | | |
109 // 0x80, followed by the signed LEB128 representation of N / 128, | |
110 // rounded towards negative infinity. | |
111 // | |
112 // In other words, we break VALUE into groups of seven bits, put | |
113 // them in little-endian order, and then write them as eight-bit | |
114 // bytes with the high bit on all but the last. | |
115 int64 ReadSignedLEB128(const char* buffer, size_t* len) const; | |
116 | |
117 // Indicate that addresses on this architecture are SIZE bytes long. SIZE | |
118 // must be either 4 or 8. (DWARF allows addresses to be any number of | |
119 // bytes in length from 1 to 255, but we only support 32- and 64-bit | |
120 // addresses at the moment.) You must call this before using the | |
121 // ReadAddress member function. | |
122 // | |
123 // For data in a .debug_info section, or something that .debug_info | |
124 // refers to like line number or macro data, the compilation unit | |
125 // header's address_size field indicates the address size to use. Call | |
126 // frame information doesn't indicate its address size (a shortcoming of | |
127 // the spec); you must supply the appropriate size based on the | |
128 // architecture of the target machine. | |
129 void SetAddressSize(uint8 size); | |
130 | |
131 // Return the current address size, in bytes. This is either 4, | |
132 // indicating 32-bit addresses, or 8, indicating 64-bit addresses. | |
133 uint8 AddressSize() const { return address_size_; } | |
134 | |
135 // Read an address from BUFFER and return it as an unsigned 64 bit | |
136 // integer, respecting this ByteReader's endianness and address size. You | |
137 // must call SetAddressSize before calling this function. | |
138 uint64 ReadAddress(const char* buffer) const; | |
139 | |
140 // DWARF actually defines two slightly different formats: 32-bit DWARF | |
141 // and 64-bit DWARF. This is *not* related to the size of registers or | |
142 // addresses on the target machine; it refers only to the size of section | |
143 // offsets and data lengths appearing in the DWARF data. One only needs | |
144 // 64-bit DWARF when the debugging data itself is larger than 4GiB. | |
145 // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the | |
146 // debugging data itself is very large. | |
147 // | |
148 // DWARF information identifies itself as 32-bit or 64-bit DWARF: each | |
149 // compilation unit and call frame information entry begins with an | |
150 // "initial length" field, which, in addition to giving the length of the | |
151 // data, also indicates the size of section offsets and lengths appearing | |
152 // in that data. The ReadInitialLength member function, below, reads an | |
153 // initial length and sets the ByteReader's offset size as a side effect. | |
154 // Thus, in the normal process of reading DWARF data, the appropriate | |
155 // offset size is set automatically. So, you should only need to call | |
156 // SetOffsetSize if you are using the same ByteReader to jump from the | |
157 // midst of one block of DWARF data into another. | |
158 | |
159 // Read a DWARF "initial length" field from START, and return it as | |
160 // an unsigned 64 bit integer, respecting this ByteReader's | |
161 // endianness. Set *LEN to the length of the initial length in | |
162 // bytes, either four or twelve. As a side effect, set this | |
163 // ByteReader's offset size to either 4 (if we see a 32-bit DWARF | |
164 // initial length) or 8 (if we see a 64-bit DWARF initial length). | |
165 // | |
166 // A DWARF initial length is either: | |
167 // | |
168 // - a byte count stored as an unsigned 32-bit value less than | |
169 // 0xffffff00, indicating that the data whose length is being | |
170 // measured uses the 32-bit DWARF format, or | |
171 // | |
172 // - The 32-bit value 0xffffffff, followed by a 64-bit byte count, | |
173 // indicating that the data whose length is being measured uses | |
174 // the 64-bit DWARF format. | |
175 uint64 ReadInitialLength(const char* start, size_t* len); | |
176 | |
177 // Read an offset from BUFFER and return it as an unsigned 64 bit | |
178 // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the | |
179 // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes | |
180 // long. You must call ReadInitialLength or SetOffsetSize before calling | |
181 // this function; see the comments above for details. | |
182 uint64 ReadOffset(const char* buffer) const; | |
183 | |
184 // Return the current offset size, in bytes. | |
185 // A return value of 4 indicates that we are reading 32-bit DWARF. | |
186 // A return value of 8 indicates that we are reading 64-bit DWARF. | |
187 uint8 OffsetSize() const { return offset_size_; } | |
188 | |
189 // Indicate that section offsets and lengths are SIZE bytes long. SIZE | |
190 // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF). | |
191 // Usually, you should not call this function yourself; instead, let a | |
192 // call to ReadInitialLength establish the data's offset size | |
193 // automatically. | |
194 void SetOffsetSize(uint8 size); | |
195 | |
196 // The Linux C++ ABI uses a variant of DWARF call frame information | |
197 // for exception handling. This data is included in the program's | |
198 // address space as the ".eh_frame" section, and intepreted at | |
199 // runtime to walk the stack, find exception handlers, and run | |
200 // cleanup code. The format is mostly the same as DWARF CFI, with | |
201 // some adjustments made to provide the additional | |
202 // exception-handling data, and to make the data easier to work with | |
203 // in memory --- for example, to allow it to be placed in read-only | |
204 // memory even when describing position-independent code. | |
205 // | |
206 // In particular, exception handling data can select a number of | |
207 // different encodings for pointers that appear in the data, as | |
208 // described by the DwarfPointerEncoding enum. There are actually | |
209 // four axes(!) to the encoding: | |
210 // | |
211 // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use | |
212 // the DWARF LEB128 encoding. | |
213 // | |
214 // - The pointer's signedness: pointers can be signed or unsigned. | |
215 // | |
216 // - The pointer's base address: the data stored in the exception | |
217 // handling data can be the actual address (that is, an absolute | |
218 // pointer), or relative to one of a number of different base | |
219 // addreses --- including that of the encoded pointer itself, for | |
220 // a form of "pc-relative" addressing. | |
221 // | |
222 // - The pointer may be indirect: it may be the address where the | |
223 // true pointer is stored. (This is used to refer to things via | |
224 // global offset table entries, program linkage table entries, or | |
225 // other tricks used in position-independent code.) | |
226 // | |
227 // There are also two options that fall outside that matrix | |
228 // altogether: the pointer may be omitted, or it may have padding to | |
229 // align it on an appropriate address boundary. (That last option | |
230 // may seem like it should be just another axis, but it is not.) | |
231 | |
232 // Indicate that the exception handling data is loaded starting at | |
233 // SECTION_BASE, and that the start of its buffer in our own memory | |
234 // is BUFFER_BASE. This allows us to find the address that a given | |
235 // byte in our buffer would have when loaded into the program the | |
236 // data describes. We need this to resolve DW_EH_PE_pcrel pointers. | |
237 void SetCFIDataBase(uint64 section_base, const char *buffer_base); | |
238 | |
239 // Indicate that the base address of the program's ".text" section | |
240 // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers. | |
241 void SetTextBase(uint64 text_base); | |
242 | |
243 // Indicate that the base address for DW_EH_PE_datarel pointers is | |
244 // DATA_BASE. The proper value depends on the ABI; it is usually the | |
245 // address of the global offset table, held in a designated register in | |
246 // position-independent code. You will need to look at the startup code | |
247 // for the target system to be sure. I tried; my eyes bled. | |
248 void SetDataBase(uint64 data_base); | |
249 | |
250 // Indicate that the base address for the FDE we are processing is | |
251 // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel | |
252 // pointers. (This encoding does not seem to be used by the GNU | |
253 // toolchain.) | |
254 void SetFunctionBase(uint64 function_base); | |
255 | |
256 // Indicate that we are no longer processing any FDE, so any use of | |
257 // a DW_EH_PE_funcrel encoding is an error. | |
258 void ClearFunctionBase(); | |
259 | |
260 // Return true if ENCODING is a valid pointer encoding. | |
261 bool ValidEncoding(DwarfPointerEncoding encoding) const; | |
262 | |
263 // Return true if we have all the information we need to read a | |
264 // pointer that uses ENCODING. This checks that the appropriate | |
265 // SetFooBase function for ENCODING has been called. | |
266 bool UsableEncoding(DwarfPointerEncoding encoding) const; | |
267 | |
268 // Read an encoded pointer from BUFFER using ENCODING; return the | |
269 // absolute address it represents, and set *LEN to the pointer's | |
270 // length in bytes, including any padding for aligned pointers. | |
271 // | |
272 // This function calls 'abort' if ENCODING is invalid or refers to a | |
273 // base address this reader hasn't been given, so you should check | |
274 // with ValidEncoding and UsableEncoding first if you would rather | |
275 // die in a more helpful way. | |
276 uint64 ReadEncodedPointer(const char *buffer, DwarfPointerEncoding encoding, | |
277 size_t *len) const; | |
278 | |
279 private: | |
280 | |
281 // Function pointer type for our address and offset readers. | |
282 typedef uint64 (ByteReader::*AddressReader)(const char*) const; | |
283 | |
284 // Read an offset from BUFFER and return it as an unsigned 64 bit | |
285 // integer. DWARF2/3 define offsets as either 4 or 8 bytes, | |
286 // generally depending on the amount of DWARF2/3 info present. | |
287 // This function pointer gets set by SetOffsetSize. | |
288 AddressReader offset_reader_; | |
289 | |
290 // Read an address from BUFFER and return it as an unsigned 64 bit | |
291 // integer. DWARF2/3 allow addresses to be any size from 0-255 | |
292 // bytes currently. Internally we support 4 and 8 byte addresses, | |
293 // and will CHECK on anything else. | |
294 // This function pointer gets set by SetAddressSize. | |
295 AddressReader address_reader_; | |
296 | |
297 Endianness endian_; | |
298 uint8 address_size_; | |
299 uint8 offset_size_; | |
300 | |
301 // Base addresses for Linux C++ exception handling data's encoded pointers. | |
302 bool have_section_base_, have_text_base_, have_data_base_; | |
303 bool have_function_base_; | |
304 uint64 section_base_, text_base_, data_base_, function_base_; | |
305 const char *buffer_base_; | |
306 }; | |
307 | |
308 } // namespace dwarf2reader | |
309 | |
310 #include "common\dwarf\bytereader-inl.h" | |
311 | |
312 #endif // COMMON_DWARF_BYTEREADER_H__ | |
OLD | NEW |