Bullet Collision Detection & Physics Library
btThreads.cpp
Go to the documentation of this file.
1/*
2Copyright (c) 2003-2014 Erwin Coumans http://bullet.googlecode.com
3
4This software is provided 'as-is', without any express or implied warranty.
5In no event will the authors be held liable for any damages arising from the use of this software.
6Permission is granted to anyone to use this software for any purpose,
7including commercial applications, and to alter it and redistribute it freely,
8subject to the following restrictions:
9
101. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
112. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
123. This notice may not be removed or altered from any source distribution.
13*/
14
15
16#include "btThreads.h"
17#include "btQuickprof.h"
18#include <algorithm> // for min and max
19
20
21#if BT_USE_OPENMP && BT_THREADSAFE
22
23#include <omp.h>
24
25#endif // #if BT_USE_OPENMP && BT_THREADSAFE
26
27
28#if BT_USE_PPL && BT_THREADSAFE
29
30// use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
31#include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL
32// Visual Studio 2010 and later should come with it
33#include <concrtrm.h> // for GetProcessorCount()
34
35#endif // #if BT_USE_PPL && BT_THREADSAFE
36
37
38#if BT_USE_TBB && BT_THREADSAFE
39
40// use Intel Threading Building Blocks for thread management
41#define __TBB_NO_IMPLICIT_LINKAGE 1
42#include <tbb/tbb.h>
43#include <tbb/task_scheduler_init.h>
44#include <tbb/parallel_for.h>
45#include <tbb/blocked_range.h>
46
47#endif // #if BT_USE_TBB && BT_THREADSAFE
48
49
50#if BT_THREADSAFE
51//
52// Lightweight spin-mutex based on atomics
53// Using ordinary system-provided mutexes like Windows critical sections was noticeably slower
54// presumably because when it fails to lock at first it would sleep the thread and trigger costly
55// context switching.
56//
57
58#if __cplusplus >= 201103L
59
60// for anything claiming full C++11 compliance, use C++11 atomics
61// on GCC or Clang you need to compile with -std=c++11
62#define USE_CPP11_ATOMICS 1
63
64#elif defined( _MSC_VER )
65
66// on MSVC, use intrinsics instead
67#define USE_MSVC_INTRINSICS 1
68
69#elif defined( __GNUC__ ) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
70
71// available since GCC 4.7 and some versions of clang
72// todo: check for clang
73#define USE_GCC_BUILTIN_ATOMICS 1
74
75#elif defined( __GNUC__ ) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
76
77// available since GCC 4.1
78#define USE_GCC_BUILTIN_ATOMICS_OLD 1
79
80#endif
81
82
83#if USE_CPP11_ATOMICS
84
85#include <atomic>
86#include <thread>
87
88#define THREAD_LOCAL_STATIC thread_local static
89
91{
92 std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
93 int expected = 0;
94 return std::atomic_compare_exchange_weak_explicit( aDest, &expected, int(1), std::memory_order_acq_rel, std::memory_order_acquire );
95}
96
98{
99 // note: this lock does not sleep the thread.
100 while (! tryLock())
101 {
102 // spin
103 }
104}
105
107{
108 std::atomic<int>* aDest = reinterpret_cast<std::atomic<int>*>(&mLock);
109 std::atomic_store_explicit( aDest, int(0), std::memory_order_release );
110}
111
112
113#elif USE_MSVC_INTRINSICS
114
115#define WIN32_LEAN_AND_MEAN
116
117#include <windows.h>
118#include <intrin.h>
119
120#define THREAD_LOCAL_STATIC __declspec( thread ) static
121
122
124{
125 volatile long* aDest = reinterpret_cast<long*>(&mLock);
126 return ( 0 == _InterlockedCompareExchange( aDest, 1, 0) );
127}
128
130{
131 // note: this lock does not sleep the thread
132 while (! tryLock())
133 {
134 // spin
135 }
136}
137
139{
140 volatile long* aDest = reinterpret_cast<long*>( &mLock );
141 _InterlockedExchange( aDest, 0 );
142}
143
144#elif USE_GCC_BUILTIN_ATOMICS
145
146#define THREAD_LOCAL_STATIC static __thread
147
148
150{
151 int expected = 0;
152 bool weak = false;
153 const int memOrderSuccess = __ATOMIC_ACQ_REL;
154 const int memOrderFail = __ATOMIC_ACQUIRE;
155 return __atomic_compare_exchange_n(&mLock, &expected, int(1), weak, memOrderSuccess, memOrderFail);
156}
157
159{
160 // note: this lock does not sleep the thread
161 while (! tryLock())
162 {
163 // spin
164 }
165}
166
168{
169 __atomic_store_n(&mLock, int(0), __ATOMIC_RELEASE);
170}
171
172#elif USE_GCC_BUILTIN_ATOMICS_OLD
173
174
175#define THREAD_LOCAL_STATIC static __thread
176
178{
179 return __sync_bool_compare_and_swap(&mLock, int(0), int(1));
180}
181
183{
184 // note: this lock does not sleep the thread
185 while (! tryLock())
186 {
187 // spin
188 }
189}
190
192{
193 // write 0
194 __sync_fetch_and_and(&mLock, int(0));
195}
196
197#else //#elif USE_MSVC_INTRINSICS
198
199#error "no threading primitives defined -- unknown platform"
200
201#endif //#else //#elif USE_MSVC_INTRINSICS
202
203#else //#if BT_THREADSAFE
204
205// These should not be called ever
207{
208 btAssert( !"unimplemented btSpinMutex::lock() called" );
209}
210
212{
213 btAssert( !"unimplemented btSpinMutex::unlock() called" );
214}
215
217{
218 btAssert( !"unimplemented btSpinMutex::tryLock() called" );
219 return true;
220}
221
222#define THREAD_LOCAL_STATIC static
223
224#endif // #else //#if BT_THREADSAFE
225
226
228{
229 unsigned int mCounter;
231
233 {
234 mCounter = 0;
235 --mCounter; // first count should come back 0
236 }
237
238 unsigned int getNext()
239 {
240 // no need to optimize this with atomics, it is only called ONCE per thread!
241 mMutex.lock();
242 mCounter++;
244 {
245 btAssert( !"thread counter exceeded" );
246 // wrap back to the first worker index
247 mCounter = 1;
248 }
249 unsigned int val = mCounter;
250 mMutex.unlock();
251 return val;
252 }
253};
254
255
257static int gThreadsRunningCounter = 0; // useful for detecting if we are trying to do nested parallel-for calls
260
261
262//
263// BT_DETECT_BAD_THREAD_INDEX tries to detect when there are multiple threads assigned the same thread index.
264//
265// BT_DETECT_BAD_THREAD_INDEX is a developer option to test if
266// certain assumptions about how the task scheduler manages its threads
267// holds true.
268// The main assumption is:
269// - when the threadpool is resized, the task scheduler either
270// 1. destroys all worker threads and creates all new ones in the correct number, OR
271// 2. never destroys a worker thread
272//
273// We make that assumption because we can't easily enumerate the worker threads of a task scheduler
274// to assign nice sequential thread-indexes. We also do not get notified if a worker thread is destroyed,
275// so we can't tell when a thread-index is no longer being used.
276// We allocate thread-indexes as needed with a sequential global thread counter.
277//
278// Our simple thread-counting scheme falls apart if the task scheduler destroys some threads but
279// continues to re-use other threads and the application repeatedly resizes the thread pool of the
280// task scheduler.
281// In order to prevent the thread-counter from exceeding the global max (BT_MAX_THREAD_COUNT), we
282// wrap the thread counter back to 1. This should only happen if the worker threads have all been
283// destroyed and re-created.
284//
285// BT_DETECT_BAD_THREAD_INDEX only works for Win32 right now,
286// but could be adapted to work with pthreads
287#define BT_DETECT_BAD_THREAD_INDEX 0
288
289#if BT_DETECT_BAD_THREAD_INDEX
290
291typedef DWORD ThreadId_t;
292const static ThreadId_t kInvalidThreadId = 0;
293ThreadId_t gDebugThreadIds[ BT_MAX_THREAD_COUNT ];
294
295static ThreadId_t getDebugThreadId()
296{
297 return GetCurrentThreadId();
298}
299
300#endif // #if BT_DETECT_BAD_THREAD_INDEX
301
302
303// return a unique index per thread, main thread is 0, worker threads are in [1, BT_MAX_THREAD_COUNT)
305{
306 const unsigned int kNullIndex = ~0U;
307 THREAD_LOCAL_STATIC unsigned int sThreadIndex = kNullIndex;
308 if ( sThreadIndex == kNullIndex )
309 {
310 sThreadIndex = gThreadCounter.getNext();
311 btAssert( sThreadIndex < BT_MAX_THREAD_COUNT );
312 }
313#if BT_DETECT_BAD_THREAD_INDEX
314 if ( gBtTaskScheduler && sThreadIndex > 0 )
315 {
316 ThreadId_t tid = getDebugThreadId();
317 // if not set
318 if ( gDebugThreadIds[ sThreadIndex ] == kInvalidThreadId )
319 {
320 // set it
321 gDebugThreadIds[ sThreadIndex ] = tid;
322 }
323 else
324 {
325 if ( gDebugThreadIds[ sThreadIndex ] != tid )
326 {
327 // this could indicate the task scheduler is breaking our assumptions about
328 // how threads are managed when threadpool is resized
329 btAssert( !"there are 2 or more threads with the same thread-index!" );
330 __debugbreak();
331 }
332 }
333 }
334#endif // #if BT_DETECT_BAD_THREAD_INDEX
335 return sThreadIndex;
336}
337
339{
340 return btGetCurrentThreadIndex() == 0;
341}
342
344{
345 // for when all current worker threads are destroyed
348}
349
351{
352 m_name = name;
354 m_isActive = false;
355}
356
358{
359 // gThreadCounter is used to assign a thread-index to each worker thread in a task scheduler.
360 // The main thread is always thread-index 0, and worker threads are numbered from 1 to 63 (BT_MAX_THREAD_COUNT-1)
361 // The thread-indexes need to be unique amongst the threads that can be running simultaneously.
362 // Since only one task scheduler can be used at a time, it is OK for a pair of threads that belong to different
363 // task schedulers to share the same thread index because they can't be running at the same time.
364 // So each task scheduler needs to keep its own thread counter value
365 if ( !m_isActive )
366 {
367 gThreadCounter.mCounter = m_savedThreadCounter; // restore saved thread counter
368 m_isActive = true;
369 }
370}
371
373{
374 if ( m_isActive )
375 {
376 m_savedThreadCounter = gThreadCounter.mCounter; // save thread counter
377 m_isActive = false;
378 }
379}
380
382{
386}
387
389{
393}
394
396{
397 return gThreadsRunningCounter != 0;
398}
399
400
402{
403 int threadId = btGetCurrentThreadIndex(); // make sure we call this on main thread at least once before any workers run
404 if ( threadId != 0 )
405 {
406 btAssert( !"btSetTaskScheduler must be called from the main thread!" );
407 return;
408 }
409 if ( gBtTaskScheduler )
410 {
411 // deactivate old task scheduler
413 }
414 gBtTaskScheduler = ts;
415 if ( ts )
416 {
417 // activate new task scheduler
418 ts->activate();
419 }
420}
421
422
424{
425 return gBtTaskScheduler;
426}
427
428
429void btParallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body )
430{
431#if BT_THREADSAFE
432
433#if BT_DETECT_BAD_THREAD_INDEX
434 if ( !btThreadsAreRunning() )
435 {
436 // clear out thread ids
437 for ( int i = 0; i < BT_MAX_THREAD_COUNT; ++i )
438 {
439 gDebugThreadIds[ i ] = kInvalidThreadId;
440 }
441 }
442#endif // #if BT_DETECT_BAD_THREAD_INDEX
443
444 btAssert( gBtTaskScheduler != NULL ); // call btSetTaskScheduler() with a valid task scheduler first!
445 gBtTaskScheduler->parallelFor( iBegin, iEnd, grainSize, body );
446
447#else // #if BT_THREADSAFE
448
449 // non-parallel version of btParallelFor
450 btAssert( !"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE" );
451 body.forLoop( iBegin, iEnd );
452
453#endif// #if BT_THREADSAFE
454}
455
456
462{
463public:
465 virtual int getMaxNumThreads() const BT_OVERRIDE { return 1; }
466 virtual int getNumThreads() const BT_OVERRIDE { return 1; }
467 virtual void setNumThreads( int numThreads ) BT_OVERRIDE {}
468 virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
469 {
470 BT_PROFILE( "parallelFor_sequential" );
471 body.forLoop( iBegin, iEnd );
472 }
473};
474
475
476#if BT_USE_OPENMP && BT_THREADSAFE
480class btTaskSchedulerOpenMP : public btITaskScheduler
481{
482 int m_numThreads;
483public:
484 btTaskSchedulerOpenMP() : btITaskScheduler( "OpenMP" )
485 {
486 m_numThreads = 0;
487 }
488 virtual int getMaxNumThreads() const BT_OVERRIDE
489 {
490 return omp_get_max_threads();
491 }
492 virtual int getNumThreads() const BT_OVERRIDE
493 {
494 return m_numThreads;
495 }
496 virtual void setNumThreads( int numThreads ) BT_OVERRIDE
497 {
498 // With OpenMP, because it is a standard with various implementations, we can't
499 // know for sure if every implementation has the same behavior of destroying all
500 // previous threads when resizing the threadpool
501 m_numThreads = ( std::max )( 1, ( std::min )( int( BT_MAX_THREAD_COUNT ), numThreads ) );
502 omp_set_num_threads( 1 ); // hopefully, all previous threads get destroyed here
503 omp_set_num_threads( m_numThreads );
504 m_savedThreadCounter = 0;
505 if ( m_isActive )
506 {
508 }
509 }
510 virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
511 {
512 BT_PROFILE( "parallelFor_OpenMP" );
514#pragma omp parallel for schedule( static, 1 )
515 for ( int i = iBegin; i < iEnd; i += grainSize )
516 {
517 BT_PROFILE( "OpenMP_job" );
518 body.forLoop( i, ( std::min )( i + grainSize, iEnd ) );
519 }
521 }
522};
523#endif // #if BT_USE_OPENMP && BT_THREADSAFE
524
525
526#if BT_USE_TBB && BT_THREADSAFE
530class btTaskSchedulerTBB : public btITaskScheduler
531{
532 int m_numThreads;
533 tbb::task_scheduler_init* m_tbbSchedulerInit;
534
535public:
536 btTaskSchedulerTBB() : btITaskScheduler( "IntelTBB" )
537 {
538 m_numThreads = 0;
539 m_tbbSchedulerInit = NULL;
540 }
541 ~btTaskSchedulerTBB()
542 {
543 if ( m_tbbSchedulerInit )
544 {
545 delete m_tbbSchedulerInit;
546 m_tbbSchedulerInit = NULL;
547 }
548 }
549
550 virtual int getMaxNumThreads() const BT_OVERRIDE
551 {
552 return tbb::task_scheduler_init::default_num_threads();
553 }
554 virtual int getNumThreads() const BT_OVERRIDE
555 {
556 return m_numThreads;
557 }
558 virtual void setNumThreads( int numThreads ) BT_OVERRIDE
559 {
560 m_numThreads = ( std::max )( 1, ( std::min )( int(BT_MAX_THREAD_COUNT), numThreads ) );
561 if ( m_tbbSchedulerInit )
562 {
563 // destroys all previous threads
564 delete m_tbbSchedulerInit;
565 m_tbbSchedulerInit = NULL;
566 }
567 m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
568 m_savedThreadCounter = 0;
569 if ( m_isActive )
570 {
572 }
573 }
574 struct BodyAdapter
575 {
576 const btIParallelForBody* mBody;
577
578 void operator()( const tbb::blocked_range<int>& range ) const
579 {
580 BT_PROFILE( "TBB_job" );
581 mBody->forLoop( range.begin(), range.end() );
582 }
583 };
584 virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
585 {
586 BT_PROFILE( "parallelFor_TBB" );
587 // TBB dispatch
588 BodyAdapter tbbBody;
589 tbbBody.mBody = &body;
591 tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
592 tbbBody,
593 tbb::simple_partitioner()
594 );
596 }
597};
598#endif // #if BT_USE_TBB && BT_THREADSAFE
599
600
601#if BT_USE_PPL && BT_THREADSAFE
605class btTaskSchedulerPPL : public btITaskScheduler
606{
607 int m_numThreads;
608public:
609 btTaskSchedulerPPL() : btITaskScheduler( "PPL" )
610 {
611 m_numThreads = 0;
612 }
613 virtual int getMaxNumThreads() const BT_OVERRIDE
614 {
615 return concurrency::GetProcessorCount();
616 }
617 virtual int getNumThreads() const BT_OVERRIDE
618 {
619 return m_numThreads;
620 }
621 virtual void setNumThreads( int numThreads ) BT_OVERRIDE
622 {
623 // capping the thread count for PPL due to a thread-index issue
624 const int maxThreadCount = (std::min)(int(BT_MAX_THREAD_COUNT), 31);
625 m_numThreads = ( std::max )( 1, ( std::min )( maxThreadCount, numThreads ) );
626 using namespace concurrency;
627 if ( CurrentScheduler::Id() != -1 )
628 {
629 CurrentScheduler::Detach();
630 }
631 SchedulerPolicy policy;
632 {
633 // PPL seems to destroy threads when threadpool is shrunk, but keeps reusing old threads
634 // force it to destroy old threads
635 policy.SetConcurrencyLimits( 1, 1 );
636 CurrentScheduler::Create( policy );
637 CurrentScheduler::Detach();
638 }
639 policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
640 CurrentScheduler::Create( policy );
641 m_savedThreadCounter = 0;
642 if ( m_isActive )
643 {
645 }
646 }
647 struct BodyAdapter
648 {
649 const btIParallelForBody* mBody;
650 int mGrainSize;
651 int mIndexEnd;
652
653 void operator()( int i ) const
654 {
655 BT_PROFILE( "PPL_job" );
656 mBody->forLoop( i, ( std::min )( i + mGrainSize, mIndexEnd ) );
657 }
658 };
659 virtual void parallelFor( int iBegin, int iEnd, int grainSize, const btIParallelForBody& body ) BT_OVERRIDE
660 {
661 BT_PROFILE( "parallelFor_PPL" );
662 // PPL dispatch
663 BodyAdapter pplBody;
664 pplBody.mBody = &body;
665 pplBody.mGrainSize = grainSize;
666 pplBody.mIndexEnd = iEnd;
668 // note: MSVC 2010 doesn't support partitioner args, so avoid them
669 concurrency::parallel_for( iBegin,
670 iEnd,
671 grainSize,
672 pplBody
673 );
675 }
676};
677#endif // #if BT_USE_PPL && BT_THREADSAFE
678
679
680// create a non-threaded task scheduler (always available)
682{
683 static btTaskSchedulerSequential sTaskScheduler;
684 return &sTaskScheduler;
685}
686
687
688// create an OpenMP task scheduler (if available, otherwise returns null)
690{
691#if BT_USE_OPENMP && BT_THREADSAFE
692 static btTaskSchedulerOpenMP sTaskScheduler;
693 return &sTaskScheduler;
694#else
695 return NULL;
696#endif
697}
698
699
700// create an Intel TBB task scheduler (if available, otherwise returns null)
702{
703#if BT_USE_TBB && BT_THREADSAFE
704 static btTaskSchedulerTBB sTaskScheduler;
705 return &sTaskScheduler;
706#else
707 return NULL;
708#endif
709}
710
711
712// create a PPL task scheduler (if available, otherwise returns null)
714{
715#if BT_USE_PPL && BT_THREADSAFE
716 static btTaskSchedulerPPL sTaskScheduler;
717 return &sTaskScheduler;
718#else
719 return NULL;
720#endif
721}
722
unsigned int U
Definition: btGjkEpa3.h:87
#define BT_PROFILE(name)
Definition: btQuickprof.h:215
#define btAssert(x)
Definition: btScalar.h:131
btITaskScheduler * btGetTBBTaskScheduler()
Definition: btThreads.cpp:701
void btPopThreadsAreRunning()
Definition: btThreads.cpp:388
btITaskScheduler * btGetPPLTaskScheduler()
Definition: btThreads.cpp:713
void btResetThreadIndexCounter()
Definition: btThreads.cpp:343
static btITaskScheduler * gBtTaskScheduler
Definition: btThreads.cpp:256
static btSpinMutex gThreadsRunningCounterMutex
Definition: btThreads.cpp:258
bool btThreadsAreRunning()
Definition: btThreads.cpp:395
void btPushThreadsAreRunning()
Definition: btThreads.cpp:381
static int gThreadsRunningCounter
Definition: btThreads.cpp:257
btITaskScheduler * btGetOpenMPTaskScheduler()
Definition: btThreads.cpp:689
unsigned int btGetCurrentThreadIndex()
Definition: btThreads.cpp:304
static ThreadsafeCounter gThreadCounter
Definition: btThreads.cpp:259
void btSetTaskScheduler(btITaskScheduler *ts)
Definition: btThreads.cpp:401
btITaskScheduler * btGetTaskScheduler()
Definition: btThreads.cpp:423
btITaskScheduler * btGetSequentialTaskScheduler()
Definition: btThreads.cpp:681
#define THREAD_LOCAL_STATIC
Definition: btThreads.cpp:222
bool btIsMainThread()
Definition: btThreads.cpp:338
void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)
Definition: btThreads.cpp:429
#define BT_OVERRIDE
Definition: btThreads.h:28
const unsigned int BT_MAX_THREAD_COUNT
Definition: btThreads.h:31
virtual void forLoop(int iBegin, int iEnd) const =0
btITaskScheduler(const char *name)
Definition: btThreads.cpp:350
virtual int getNumThreads() const =0
unsigned int m_savedThreadCounter
Definition: btThreads.h:127
virtual int getMaxNumThreads() const =0
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)=0
virtual void deactivate()
Definition: btThreads.cpp:372
virtual void setNumThreads(int numThreads)=0
const char * m_name
Definition: btThreads.h:126
virtual void activate()
Definition: btThreads.cpp:357
btSpinMutex – lightweight spin-mutex implemented with atomic ops, never puts a thread to sleep becaus...
Definition: btThreads.h:46
void lock()
Definition: btThreads.cpp:206
bool tryLock()
Definition: btThreads.cpp:216
void unlock()
Definition: btThreads.cpp:211
btTaskSchedulerSequential – non-threaded implementation of task scheduler (really just useful for tes...
Definition: btThreads.cpp:462
virtual void setNumThreads(int numThreads) BT_OVERRIDE
Definition: btThreads.cpp:467
virtual int getMaxNumThreads() const BT_OVERRIDE
Definition: btThreads.cpp:465
virtual int getNumThreads() const BT_OVERRIDE
Definition: btThreads.cpp:466
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body) BT_OVERRIDE
Definition: btThreads.cpp:468
unsigned int getNext()
Definition: btThreads.cpp:238
btSpinMutex mMutex
Definition: btThreads.cpp:230
unsigned int mCounter
Definition: btThreads.cpp:229