OLD | NEW |
1 // Copyright (c) 2008, Google Inc. | 1 // Copyright (c) 2008, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 14 matching lines...) Expand all Loading... |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
30 // --- | 30 // --- |
31 // Author: Ken Ashcraft <opensource@google.com> | 31 // Author: Ken Ashcraft <opensource@google.com> |
32 | 32 |
33 #include <config.h> | 33 #include <config.h> |
34 #include "thread_cache.h" | 34 #include "thread_cache.h" |
35 #include <errno.h> | |
36 #include <string.h> // for memcpy | 35 #include <string.h> // for memcpy |
37 #include <algorithm> // for max, min | 36 #include <algorithm> // for max, min |
38 #include "base/commandlineflags.h" // for SpinLockHolder | 37 #include "base/commandlineflags.h" // for SpinLockHolder |
39 #include "base/spinlock.h" // for SpinLockHolder | 38 #include "base/spinlock.h" // for SpinLockHolder |
40 #include "central_freelist.h" // for CentralFreeListPadded | 39 #include "central_freelist.h" // for CentralFreeListPadded |
41 #include "maybe_threads.h" | 40 #include "maybe_threads.h" |
42 | 41 |
43 using std::min; | 42 using std::min; |
44 using std::max; | 43 using std::max; |
45 | 44 |
46 DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, | 45 DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, |
47 EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", | 46 EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", |
48 kDefaultOverallThreadCacheSize), | 47 kDefaultOverallThreadCacheSize), |
49 "Bound on the total amount of bytes allocated to " | 48 "Bound on the total amount of bytes allocated to " |
50 "thread caches. This bound is not strict, so it is possible " | 49 "thread caches. This bound is not strict, so it is possible " |
51 "for the cache to go over this bound in certain circumstances. " | 50 "for the cache to go over this bound in certain circumstances. "); |
52 "Maximum value of this flag is capped to 1 GB."); | |
53 | 51 |
54 namespace tcmalloc { | 52 namespace tcmalloc { |
55 | 53 |
56 static bool phinited = false; | 54 static bool phinited = false; |
57 | 55 |
58 volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; | 56 volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; |
59 size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; | 57 size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; |
60 ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; | 58 ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; |
61 PageHeapAllocator<ThreadCache> threadcache_allocator; | 59 PageHeapAllocator<ThreadCache> threadcache_allocator; |
62 ThreadCache* ThreadCache::thread_heaps_ = NULL; | 60 ThreadCache* ThreadCache::thread_heaps_ = NULL; |
63 int ThreadCache::thread_heap_count_ = 0; | 61 int ThreadCache::thread_heap_count_ = 0; |
64 ThreadCache* ThreadCache::next_memory_steal_ = NULL; | 62 ThreadCache* ThreadCache::next_memory_steal_ = NULL; |
65 #ifdef HAVE_TLS | 63 #ifdef HAVE_TLS |
66 __thread ThreadCache* ThreadCache::threadlocal_heap_ | 64 __thread ThreadCache* ThreadCache::threadlocal_heap_ |
67 # ifdef HAVE___ATTRIBUTE__ | 65 # ifdef HAVE___ATTRIBUTE__ |
68 __attribute__ ((tls_model ("initial-exec"))) | 66 __attribute__ ((tls_model ("initial-exec"))) |
69 # endif | 67 # endif |
70 ; | 68 ; |
71 #endif | 69 #endif |
72 bool ThreadCache::tsd_inited_ = false; | 70 bool ThreadCache::tsd_inited_ = false; |
73 pthread_key_t ThreadCache::heap_key_; | 71 pthread_key_t ThreadCache::heap_key_; |
74 | 72 |
75 #if defined(HAVE_TLS) | 73 #if defined(HAVE_TLS) |
76 bool kernel_supports_tls = false; // be conservative | 74 bool kernel_supports_tls = false; // be conservative |
77 # if defined(_WIN32) // windows has supported TLS since winnt, I think. | 75 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS |
78 void CheckIfKernelSupportsTLS() { | |
79 kernel_supports_tls = true; | |
80 } | |
81 # elif !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS | |
82 void CheckIfKernelSupportsTLS() { | 76 void CheckIfKernelSupportsTLS() { |
83 kernel_supports_tls = false; | 77 kernel_supports_tls = false; |
84 } | 78 } |
85 # else | 79 # else |
86 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too | 80 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too |
87 void CheckIfKernelSupportsTLS() { | 81 void CheckIfKernelSupportsTLS() { |
88 struct utsname buf; | 82 struct utsname buf; |
89 if (uname(&buf) < 0) { // should be impossible | 83 if (uname(&buf) != 0) { // should be impossible |
90 Log(kLog, __FILE__, __LINE__, | 84 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); |
91 "uname failed assuming no TLS support (errno)", errno); | |
92 kernel_supports_tls = false; | 85 kernel_supports_tls = false; |
93 } else if (strcasecmp(buf.sysname, "linux") == 0) { | 86 } else if (strcasecmp(buf.sysname, "linux") == 0) { |
94 // The linux case: the first kernel to support TLS was 2.6.0 | 87 // The linux case: the first kernel to support TLS was 2.6.0 |
95 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x | 88 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x |
96 kernel_supports_tls = false; | 89 kernel_supports_tls = false; |
97 else if (buf.release[0] == '2' && buf.release[1] == '.' && | 90 else if (buf.release[0] == '2' && buf.release[1] == '.' && |
98 buf.release[2] >= '0' && buf.release[2] < '6' && | 91 buf.release[2] >= '0' && buf.release[2] < '6' && |
99 buf.release[3] == '.') // 2.0 - 2.5 | 92 buf.release[3] == '.') // 2.0 - 2.5 |
100 kernel_supports_tls = false; | 93 kernel_supports_tls = false; |
101 else | 94 else |
102 kernel_supports_tls = true; | 95 kernel_supports_tls = true; |
103 } else if (strcasecmp(buf.sysname, "CYGWIN_NT-6.1-WOW64") == 0) { | |
104 // In my testing, this version of cygwin, at least, would hang | |
105 // when using TLS. | |
106 kernel_supports_tls = false; | |
107 } else { // some other kernel, we'll be optimisitic | 96 } else { // some other kernel, we'll be optimisitic |
108 kernel_supports_tls = true; | 97 kernel_supports_tls = true; |
109 } | 98 } |
110 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG | 99 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG |
111 } | 100 } |
112 # endif // HAVE_DECL_UNAME | 101 # endif // HAVE_DECL_UNAME |
113 #endif // HAVE_TLS | 102 #endif // HAVE_TLS |
114 | 103 |
115 void ThreadCache::Init(pthread_t tid) { | 104 void ThreadCache::Init(pthread_t tid) { |
116 size_ = 0; | 105 size_ = 0; |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
263 const int batch_size = Static::sizemap()->num_objects_to_move(cl); | 252 const int batch_size = Static::sizemap()->num_objects_to_move(cl); |
264 if (list->max_length() > batch_size) { | 253 if (list->max_length() > batch_size) { |
265 list->set_max_length( | 254 list->set_max_length( |
266 max<int>(list->max_length() - batch_size, batch_size)); | 255 max<int>(list->max_length() - batch_size, batch_size)); |
267 } | 256 } |
268 } | 257 } |
269 list->clear_lowwatermark(); | 258 list->clear_lowwatermark(); |
270 } | 259 } |
271 | 260 |
272 IncreaseCacheLimit(); | 261 IncreaseCacheLimit(); |
| 262 |
| 263 // int64 finish = CycleClock::Now(); |
| 264 // CycleTimer ct; |
| 265 // MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); |
273 } | 266 } |
274 | 267 |
275 void ThreadCache::IncreaseCacheLimit() { | 268 void ThreadCache::IncreaseCacheLimit() { |
276 SpinLockHolder h(Static::pageheap_lock()); | 269 SpinLockHolder h(Static::pageheap_lock()); |
277 IncreaseCacheLimitLocked(); | 270 IncreaseCacheLimitLocked(); |
278 } | 271 } |
279 | 272 |
280 void ThreadCache::IncreaseCacheLimitLocked() { | 273 void ThreadCache::IncreaseCacheLimitLocked() { |
281 if (unclaimed_cache_space_ > 0) { | 274 if (unclaimed_cache_space_ > 0) { |
282 // Possibly make unclaimed_cache_space_ negative. | 275 // Possibly make unclaimed_cache_space_ negative. |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
322 Static::InitStaticVars(); | 315 Static::InitStaticVars(); |
323 threadcache_allocator.Init(); | 316 threadcache_allocator.Init(); |
324 phinited = 1; | 317 phinited = 1; |
325 } | 318 } |
326 } | 319 } |
327 | 320 |
328 void ThreadCache::InitTSD() { | 321 void ThreadCache::InitTSD() { |
329 ASSERT(!tsd_inited_); | 322 ASSERT(!tsd_inited_); |
330 perftools_pthread_key_create(&heap_key_, DestroyThreadCache); | 323 perftools_pthread_key_create(&heap_key_, DestroyThreadCache); |
331 tsd_inited_ = true; | 324 tsd_inited_ = true; |
332 | |
333 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY | |
334 // We may have used a fake pthread_t for the main thread. Fix it. | |
335 pthread_t zero; | |
336 memset(&zero, 0, sizeof(zero)); | |
337 SpinLockHolder h(Static::pageheap_lock()); | |
338 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | |
339 if (h->tid_ == zero) { | |
340 h->tid_ = pthread_self(); | |
341 } | |
342 } | |
343 #endif | |
344 } | 325 } |
345 | 326 |
346 ThreadCache* ThreadCache::CreateCacheIfNecessary() { | 327 ThreadCache* ThreadCache::CreateCacheIfNecessary() { |
347 // Initialize per-thread data if necessary | 328 // Initialize per-thread data if necessary |
348 ThreadCache* heap = NULL; | 329 ThreadCache* heap = NULL; |
349 { | 330 { |
350 SpinLockHolder h(Static::pageheap_lock()); | 331 SpinLockHolder h(Static::pageheap_lock()); |
351 // On some old glibc's, and on freebsd's libc (as of freebsd 8.1), | 332 // On very old libc's, this call may crash if it happens too |
352 // calling pthread routines (even pthread_self) too early could | 333 // early. No libc using NPTL should be affected. If there |
353 // cause a segfault. Since we can call pthreads quite early, we | 334 // is a crash here, we could use code (on linux, at least) |
354 // have to protect against that in such situations by making a | 335 // to detect NPTL vs LinuxThreads: |
355 // 'fake' pthread. This is not ideal since it doesn't work well | 336 // http://www.redhat.com/archives/phil-list/2003-April/msg00038.html |
356 // when linking tcmalloc statically with apps that create threads | 337 // If we detect not-NPTL, we could execute the old code from |
357 // before main, so we only do it if we have to. | 338 // http://google-perftools.googlecode.com/svn/tags/google-perftools-1.7/sr
c/thread_cache.cc |
358 #ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY | 339 // that avoids calling pthread_self too early. The problem with |
359 pthread_t me; | 340 // that code is it caused a race condition when tcmalloc is linked |
360 if (!tsd_inited_) { | 341 // in statically and other libraries spawn threads before main. |
361 memset(&me, 0, sizeof(me)); | |
362 } else { | |
363 me = pthread_self(); | |
364 } | |
365 #else | |
366 const pthread_t me = pthread_self(); | 342 const pthread_t me = pthread_self(); |
367 #endif | |
368 | 343 |
369 // This may be a recursive malloc call from pthread_setspecific() | 344 // This may be a recursive malloc call from pthread_setspecific() |
370 // In that case, the heap for this thread has already been created | 345 // In that case, the heap for this thread has already been created |
371 // and added to the linked list. So we search for that first. | 346 // and added to the linked list. So we search for that first. |
372 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | 347 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
373 if (h->tid_ == me) { | 348 if (h->tid_ == me) { |
374 heap = h; | 349 heap = h; |
375 break; | 350 break; |
376 } | 351 } |
377 } | 352 } |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
480 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | 455 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
481 // Increasing the total cache size should not circumvent the | 456 // Increasing the total cache size should not circumvent the |
482 // slow-start growth of max_size_. | 457 // slow-start growth of max_size_. |
483 if (ratio < 1.0) { | 458 if (ratio < 1.0) { |
484 h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); | 459 h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); |
485 } | 460 } |
486 claimed += h->max_size_; | 461 claimed += h->max_size_; |
487 } | 462 } |
488 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; | 463 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; |
489 per_thread_cache_size_ = space; | 464 per_thread_cache_size_ = space; |
| 465 // TCMalloc_MESSAGE(__FILE__, __LINE__, "Threads %d => cache size %8d\n", n,
int(space)); |
| 466 } |
| 467 |
| 468 void ThreadCache::Print(TCMalloc_Printer* out) const { |
| 469 for (int cl = 0; cl < kNumClasses; ++cl) { |
| 470 out->printf(" %5" PRIuS " : %4" PRIuS " len; %4d lo; %4"PRIuS |
| 471 " max; %4"PRIuS" overages;\n", |
| 472 Static::sizemap()->ByteSizeForClass(cl), |
| 473 list_[cl].length(), |
| 474 list_[cl].lowwatermark(), |
| 475 list_[cl].max_length(), |
| 476 list_[cl].length_overages()); |
| 477 } |
| 478 } |
| 479 |
| 480 void ThreadCache::PrintThreads(TCMalloc_Printer* out) { |
| 481 size_t actual_limit = 0; |
| 482 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
| 483 h->Print(out); |
| 484 actual_limit += h->max_size_; |
| 485 } |
| 486 out->printf("ThreadCache overall: %"PRIuS ", unclaimed: %"PRIuS |
| 487 ", actual: %"PRIuS"\n", |
| 488 overall_thread_cache_size_, unclaimed_cache_space_, actual_limit); |
490 } | 489 } |
491 | 490 |
492 void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { | 491 void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { |
493 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { | 492 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { |
494 *total_bytes += h->Size(); | 493 *total_bytes += h->Size(); |
495 if (class_count) { | 494 if (class_count) { |
496 for (int cl = 0; cl < kNumClasses; ++cl) { | 495 for (int cl = 0; cl < kNumClasses; ++cl) { |
497 class_count[cl] += h->freelist_length(cl); | 496 class_count[cl] += h->freelist_length(cl); |
498 } | 497 } |
499 } | 498 } |
500 } | 499 } |
501 } | 500 } |
502 | 501 |
503 void ThreadCache::set_overall_thread_cache_size(size_t new_size) { | 502 void ThreadCache::set_overall_thread_cache_size(size_t new_size) { |
504 // Clip the value to a reasonable range | 503 // Clip the value to a reasonable range |
505 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; | 504 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; |
506 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB | 505 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB |
507 overall_thread_cache_size_ = new_size; | 506 overall_thread_cache_size_ = new_size; |
508 | 507 |
509 RecomputePerThreadCacheSize(); | 508 RecomputePerThreadCacheSize(); |
510 } | 509 } |
511 | 510 |
512 } // namespace tcmalloc | 511 } // namespace tcmalloc |
OLD | NEW |