OLD | NEW |
| (Empty) |
1 // Copyright (c) 2008, Google Inc. | |
2 // All rights reserved. | |
3 // | |
4 // Redistribution and use in source and binary forms, with or without | |
5 // modification, are permitted provided that the following conditions are | |
6 // met: | |
7 // | |
8 // * Redistributions of source code must retain the above copyright | |
9 // notice, this list of conditions and the following disclaimer. | |
10 // * Redistributions in binary form must reproduce the above | |
11 // copyright notice, this list of conditions and the following disclaimer | |
12 // in the documentation and/or other materials provided with the | |
13 // distribution. | |
14 // * Neither the name of Google Inc. nor the names of its | |
15 // contributors may be used to endorse or promote products derived from | |
16 // this software without specific prior written permission. | |
17 // | |
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | |
30 // --- | |
31 // Author: Paul Pluzhnikov | |
32 // | |
33 // Allow dynamic symbol lookup in an in-memory Elf image. | |
34 // | |
35 | |
36 #include "base/elf_mem_image.h" | |
37 | |
38 #ifdef HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h | |
39 | |
40 #include <stddef.h> // for size_t, ptrdiff_t | |
41 #include "base/logging.h" | |
42 | |
43 // From binutils/include/elf/common.h (this doesn't appear to be documented | |
44 // anywhere else). | |
45 // | |
46 // /* This flag appears in a Versym structure. It means that the symbol | |
47 // is hidden, and is only visible with an explicit version number. | |
48 // This is a GNU extension. */ | |
49 // #define VERSYM_HIDDEN 0x8000 | |
50 // | |
51 // /* This is the mask for the rest of the Versym information. */ | |
52 // #define VERSYM_VERSION 0x7fff | |
53 | |
54 #define VERSYM_VERSION 0x7fff | |
55 | |
56 namespace base { | |
57 | |
58 namespace { | |
59 template <int N> class ElfClass { | |
60 public: | |
61 static const int kElfClass = -1; | |
62 static int ElfBind(const ElfW(Sym) *) { | |
63 CHECK(false); // << "Unexpected word size"; | |
64 return 0; | |
65 } | |
66 static int ElfType(const ElfW(Sym) *) { | |
67 CHECK(false); // << "Unexpected word size"; | |
68 return 0; | |
69 } | |
70 }; | |
71 | |
72 template <> class ElfClass<32> { | |
73 public: | |
74 static const int kElfClass = ELFCLASS32; | |
75 static int ElfBind(const ElfW(Sym) *symbol) { | |
76 return ELF32_ST_BIND(symbol->st_info); | |
77 } | |
78 static int ElfType(const ElfW(Sym) *symbol) { | |
79 return ELF32_ST_TYPE(symbol->st_info); | |
80 } | |
81 }; | |
82 | |
83 template <> class ElfClass<64> { | |
84 public: | |
85 static const int kElfClass = ELFCLASS64; | |
86 static int ElfBind(const ElfW(Sym) *symbol) { | |
87 return ELF64_ST_BIND(symbol->st_info); | |
88 } | |
89 static int ElfType(const ElfW(Sym) *symbol) { | |
90 return ELF64_ST_TYPE(symbol->st_info); | |
91 } | |
92 }; | |
93 | |
94 typedef ElfClass<__WORDSIZE> CurrentElfClass; | |
95 | |
96 // Extract an element from one of the ELF tables, cast it to desired type. | |
97 // This is just a simple arithmetic and a glorified cast. | |
98 // Callers are responsible for bounds checking. | |
99 template <class T> | |
100 const T* GetTableElement(const ElfW(Ehdr) *ehdr, | |
101 ElfW(Off) table_offset, | |
102 ElfW(Word) element_size, | |
103 size_t index) { | |
104 return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) | |
105 + table_offset | |
106 + index * element_size); | |
107 } | |
108 } // namespace | |
109 | |
110 const void *const ElfMemImage::kInvalidBase = | |
111 reinterpret_cast<const void *>(~0L); | |
112 | |
113 ElfMemImage::ElfMemImage(const void *base) { | |
114 CHECK(base != kInvalidBase); | |
115 Init(base); | |
116 } | |
117 | |
118 int ElfMemImage::GetNumSymbols() const { | |
119 if (!hash_) { | |
120 return 0; | |
121 } | |
122 // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash | |
123 return hash_[1]; | |
124 } | |
125 | |
126 const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { | |
127 CHECK_LT(index, GetNumSymbols()); | |
128 return dynsym_ + index; | |
129 } | |
130 | |
131 const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { | |
132 CHECK_LT(index, GetNumSymbols()); | |
133 return versym_ + index; | |
134 } | |
135 | |
136 const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { | |
137 CHECK_LT(index, ehdr_->e_phnum); | |
138 return GetTableElement<ElfW(Phdr)>(ehdr_, | |
139 ehdr_->e_phoff, | |
140 ehdr_->e_phentsize, | |
141 index); | |
142 } | |
143 | |
144 const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { | |
145 CHECK_LT(offset, strsize_); | |
146 return dynstr_ + offset; | |
147 } | |
148 | |
149 const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { | |
150 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { | |
151 // Symbol corresponds to "special" (e.g. SHN_ABS) section. | |
152 return reinterpret_cast<const void *>(sym->st_value); | |
153 } | |
154 CHECK_LT(link_base_, sym->st_value); | |
155 return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_; | |
156 } | |
157 | |
158 const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { | |
159 CHECK_LE(index, verdefnum_); | |
160 const ElfW(Verdef) *version_definition = verdef_; | |
161 while (version_definition->vd_ndx < index && version_definition->vd_next) { | |
162 const char *const version_definition_as_char = | |
163 reinterpret_cast<const char *>(version_definition); | |
164 version_definition = | |
165 reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + | |
166 version_definition->vd_next); | |
167 } | |
168 return version_definition->vd_ndx == index ? version_definition : NULL; | |
169 } | |
170 | |
171 const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( | |
172 const ElfW(Verdef) *verdef) const { | |
173 return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); | |
174 } | |
175 | |
176 const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { | |
177 CHECK_LT(offset, strsize_); | |
178 return dynstr_ + offset; | |
179 } | |
180 | |
181 void ElfMemImage::Init(const void *base) { | |
182 ehdr_ = NULL; | |
183 dynsym_ = NULL; | |
184 dynstr_ = NULL; | |
185 versym_ = NULL; | |
186 verdef_ = NULL; | |
187 hash_ = NULL; | |
188 strsize_ = 0; | |
189 verdefnum_ = 0; | |
190 link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. | |
191 if (!base) { | |
192 return; | |
193 } | |
194 const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base); | |
195 // Fake VDSO has low bit set. | |
196 const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); | |
197 base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1); | |
198 const char *const base_as_char = reinterpret_cast<const char *>(base); | |
199 if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || | |
200 base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { | |
201 RAW_DCHECK(false, "no ELF magic"); // at %p", base); | |
202 return; | |
203 } | |
204 int elf_class = base_as_char[EI_CLASS]; | |
205 if (elf_class != CurrentElfClass::kElfClass) { | |
206 DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); | |
207 return; | |
208 } | |
209 switch (base_as_char[EI_DATA]) { | |
210 case ELFDATA2LSB: { | |
211 if (__LITTLE_ENDIAN != __BYTE_ORDER) { | |
212 DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; | |
213 return; | |
214 } | |
215 break; | |
216 } | |
217 case ELFDATA2MSB: { | |
218 if (__BIG_ENDIAN != __BYTE_ORDER) { | |
219 DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; | |
220 return; | |
221 } | |
222 break; | |
223 } | |
224 default: { | |
225 RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]
; | |
226 return; | |
227 } | |
228 } | |
229 | |
230 ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); | |
231 const ElfW(Phdr) *dynamic_program_header = NULL; | |
232 for (int i = 0; i < ehdr_->e_phnum; ++i) { | |
233 const ElfW(Phdr) *const program_header = GetPhdr(i); | |
234 switch (program_header->p_type) { | |
235 case PT_LOAD: | |
236 if (link_base_ == ~0L) { | |
237 link_base_ = program_header->p_vaddr; | |
238 } | |
239 break; | |
240 case PT_DYNAMIC: | |
241 dynamic_program_header = program_header; | |
242 break; | |
243 } | |
244 } | |
245 if (link_base_ == ~0L || !dynamic_program_header) { | |
246 RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); | |
247 RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); | |
248 // Mark this image as not present. Can not recur infinitely. | |
249 Init(0); | |
250 return; | |
251 } | |
252 ptrdiff_t relocation = | |
253 base_as_char - reinterpret_cast<const char *>(link_base_); | |
254 ElfW(Dyn) *dynamic_entry = | |
255 reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + | |
256 relocation); | |
257 for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { | |
258 ElfW(Xword) value = dynamic_entry->d_un.d_val; | |
259 if (fake_vdso) { | |
260 // A complication: in the real VDSO, dynamic entries are not relocated | |
261 // (it wasn't loaded by a dynamic loader). But when testing with a | |
262 // "fake" dlopen()ed vdso library, the loader relocates some (but | |
263 // not all!) of them before we get here. | |
264 if (dynamic_entry->d_tag == DT_VERDEF) { | |
265 // The only dynamic entry (of the ones we care about) libc-2.3.6 | |
266 // loader doesn't relocate. | |
267 value += relocation; | |
268 } | |
269 } else { | |
270 // Real VDSO. Everything needs to be relocated. | |
271 value += relocation; | |
272 } | |
273 switch (dynamic_entry->d_tag) { | |
274 case DT_HASH: | |
275 hash_ = reinterpret_cast<ElfW(Word) *>(value); | |
276 break; | |
277 case DT_SYMTAB: | |
278 dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); | |
279 break; | |
280 case DT_STRTAB: | |
281 dynstr_ = reinterpret_cast<const char *>(value); | |
282 break; | |
283 case DT_VERSYM: | |
284 versym_ = reinterpret_cast<ElfW(Versym) *>(value); | |
285 break; | |
286 case DT_VERDEF: | |
287 verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); | |
288 break; | |
289 case DT_VERDEFNUM: | |
290 verdefnum_ = dynamic_entry->d_un.d_val; | |
291 break; | |
292 case DT_STRSZ: | |
293 strsize_ = dynamic_entry->d_un.d_val; | |
294 break; | |
295 default: | |
296 // Unrecognized entries explicitly ignored. | |
297 break; | |
298 } | |
299 } | |
300 if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || | |
301 !verdef_ || !verdefnum_ || !strsize_) { | |
302 RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); | |
303 RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); | |
304 RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); | |
305 RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); | |
306 RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); | |
307 RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); | |
308 RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); | |
309 // Mark this image as not present. Can not recur infinitely. | |
310 Init(0); | |
311 return; | |
312 } | |
313 } | |
314 | |
315 bool ElfMemImage::LookupSymbol(const char *name, | |
316 const char *version, | |
317 int type, | |
318 SymbolInfo *info) const { | |
319 for (SymbolIterator it = begin(); it != end(); ++it) { | |
320 if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && | |
321 CurrentElfClass::ElfType(it->symbol) == type) { | |
322 if (info) { | |
323 *info = *it; | |
324 } | |
325 return true; | |
326 } | |
327 } | |
328 return false; | |
329 } | |
330 | |
331 bool ElfMemImage::LookupSymbolByAddress(const void *address, | |
332 SymbolInfo *info_out) const { | |
333 for (SymbolIterator it = begin(); it != end(); ++it) { | |
334 const char *const symbol_start = | |
335 reinterpret_cast<const char *>(it->address); | |
336 const char *const symbol_end = symbol_start + it->symbol->st_size; | |
337 if (symbol_start <= address && address < symbol_end) { | |
338 if (info_out) { | |
339 // Client wants to know details for that symbol (the usual case). | |
340 if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { | |
341 // Strong symbol; just return it. | |
342 *info_out = *it; | |
343 return true; | |
344 } else { | |
345 // Weak or local. Record it, but keep looking for a strong one. | |
346 *info_out = *it; | |
347 } | |
348 } else { | |
349 // Client only cares if there is an overlapping symbol. | |
350 return true; | |
351 } | |
352 } | |
353 } | |
354 return false; | |
355 } | |
356 | |
357 ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) | |
358 : index_(index), image_(image) { | |
359 } | |
360 | |
361 const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { | |
362 return &info_; | |
363 } | |
364 | |
365 const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { | |
366 return info_; | |
367 } | |
368 | |
369 bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { | |
370 return this->image_ == rhs.image_ && this->index_ == rhs.index_; | |
371 } | |
372 | |
373 bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { | |
374 return !(*this == rhs); | |
375 } | |
376 | |
377 ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { | |
378 this->Update(1); | |
379 return *this; | |
380 } | |
381 | |
382 ElfMemImage::SymbolIterator ElfMemImage::begin() const { | |
383 SymbolIterator it(this, 0); | |
384 it.Update(0); | |
385 return it; | |
386 } | |
387 | |
388 ElfMemImage::SymbolIterator ElfMemImage::end() const { | |
389 return SymbolIterator(this, GetNumSymbols()); | |
390 } | |
391 | |
392 void ElfMemImage::SymbolIterator::Update(int increment) { | |
393 const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); | |
394 CHECK(image->IsPresent() || increment == 0); | |
395 if (!image->IsPresent()) { | |
396 return; | |
397 } | |
398 index_ += increment; | |
399 if (index_ >= image->GetNumSymbols()) { | |
400 index_ = image->GetNumSymbols(); | |
401 return; | |
402 } | |
403 const ElfW(Sym) *symbol = image->GetDynsym(index_); | |
404 const ElfW(Versym) *version_symbol = image->GetVersym(index_); | |
405 CHECK(symbol && version_symbol); | |
406 const char *const symbol_name = image->GetDynstr(symbol->st_name); | |
407 const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; | |
408 const ElfW(Verdef) *version_definition = NULL; | |
409 const char *version_name = ""; | |
410 if (symbol->st_shndx == SHN_UNDEF) { | |
411 // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and | |
412 // version_index could well be greater than verdefnum_, so calling | |
413 // GetVerdef(version_index) may trigger assertion. | |
414 } else { | |
415 version_definition = image->GetVerdef(version_index); | |
416 } | |
417 if (version_definition) { | |
418 // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, | |
419 // optional 2nd if the version has a parent. | |
420 CHECK_LE(1, version_definition->vd_cnt); | |
421 CHECK_LE(version_definition->vd_cnt, 2); | |
422 const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); | |
423 version_name = image->GetVerstr(version_aux->vda_name); | |
424 } | |
425 info_.name = symbol_name; | |
426 info_.version = version_name; | |
427 info_.address = image->GetSymAddr(symbol); | |
428 info_.symbol = symbol; | |
429 } | |
430 | |
431 } // namespace base | |
432 | |
433 #endif // HAVE_ELF_MEM_IMAGE | |
OLD | NEW |