1 /**
2  * Threads and thread-pool.
3  *
4  * Copyright: Copyright Sean Kelly 2005 - 2012.
5  * Copyright: Copyright (c) 2009-2011, David Simcha.
6  * Copyright: Copyright Guillaume Piolat 2016.
7  * License: Distributed under the
8  *      $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0).
9  *    (See accompanying file LICENSE)
10  * Authors:   Sean Kelly, Walter Bright, Alex Rønne Petersen, Martin Nowak, David Simcha, Guillaume Piolat
11  */
12 module dplug.core.thread;
13 
14 import core.stdc.stdlib;
15 import core.stdc.stdio;
16 
17 import dplug.core.nogc;
18 import dplug.core.lockedqueue;
19 import dplug.core.sync;
20 
21 version(Posix)
22     import core.sys.posix.pthread;
23 else version(Windows)
24 {
25     import core.stdc.stdint : uintptr_t;
26     import core.sys.windows.windef;
27     import core.sys.windows.winbase;
28     import core.thread;
29 
30     extern (Windows) alias btex_fptr = uint function(void*) ;
31     extern (C) uintptr_t _beginthreadex(void*, uint, btex_fptr, void*, uint, uint*) nothrow @nogc;
32 }
33 else
34     static assert(false, "Platform not supported");
35 
36 version (OSX)
37     version = Darwin;
38 else version (iOS)
39     version = Darwin;
40 else version (TVOS)
41     version = Darwin;
42 else version (WatchOS)
43     version = Darwin;
44 
45 version(Darwin)
46 {
47     extern(C) nothrow @nogc
48     int sysctlbyname(const(char)*, void *, size_t *, void *, size_t);
49 }
50 
51 //debug = threadPoolIsActuallySynchronous;
52 
53 
54 /// Legacy thread function
55 alias ThreadDelegate = void delegate() nothrow @nogc;
56 
57 /// Thread function with user data, used eg. in thread pool.
58 alias ThreadDelegateUser = void delegate(void* userData) nothrow @nogc;
59 
60 
61 Thread makeThread(ThreadDelegate callback, size_t stackSize = 0) nothrow @nogc
62 {
63     return Thread(callback, stackSize);
64 }
65 
66 Thread makeThread(ThreadDelegateUser callback, size_t stackSize = 0, void* userData = null) nothrow @nogc
67 {
68     return Thread(callback, stackSize, userData);
69 }
70 
71 /// Optimistic thread, failure not supported
72 struct Thread
73 {
74 nothrow:
75 @nogc:
76 public:
77 
78     /// Create a thread with user data. Thread is not created until `start` has been called.
79     ///
80     /// Params:
81     ///     callback  = The delegate that will be called by the thread.
82     ///     stackSize = The thread stack size in bytes. 0 for default size.
83     ///     userData  = a pointer to be passed to thread delegate
84     ///
85     /// Warning: It is STRONGLY ADVISED to pass a class member delegate (not a struct
86     ///          member delegate) to have additional context.
87     ///          Passing struct method delegates are currently UNSUPPORTED.
88     ///
89     this(ThreadDelegate callback, size_t stackSize = 0)
90     {
91         _stackSize = stackSize;
92         _context = cast(CreateContext*) malloc(CreateContext.sizeof);
93         _context.callback = callback;
94         _context.callbackUser = null;
95     }
96 
97     ///ditto
98     this(ThreadDelegateUser callback, size_t stackSize = 0, void* userData = null)
99     {
100         _stackSize = stackSize;
101         _context = cast(CreateContext*) malloc(CreateContext.sizeof);
102         _context.callback = null;
103         _context.callbackUser = callback;
104         _context.userData = userData;
105     }
106 
107     ~this()
108     {
109         if (_context !is null)
110         {
111             free(_context);
112             _context = null;
113         }
114     }
115 
116     @disable this(this);
117 
118     /// Starts the thread. Threads are created suspended. This function can
119     /// only be called once.
120     void start()
121     {
122         version(Posix)
123         {
124             pthread_attr_t attr;
125 
126             int err = assumeNothrowNoGC(
127                 (pthread_attr_t* pattr)
128                 {
129                     return pthread_attr_init(pattr);
130                 })(&attr);
131 
132             if (err != 0)
133                 assert(false);
134 
135             if(_stackSize != 0)
136             {
137                 int err2 = assumeNothrowNoGC(
138                     (pthread_attr_t* pattr, size_t stackSize)
139                     {
140                         return pthread_attr_setstacksize(pattr, stackSize);
141                     })(&attr, _stackSize);
142                 if (err2 != 0)
143                     assert(false);
144             }
145 
146             int err3 = pthread_create(&_id, &attr, &posixThreadEntryPoint, _context);
147             if (err3 != 0)
148                 assert(false);
149 
150             int err4 = assumeNothrowNoGC(
151                 (pthread_attr_t* pattr)
152                 {
153                     return pthread_attr_destroy(pattr);
154                 })(&attr);
155             if (err4 != 0)
156                 assert(false);
157         }
158         else version(Windows)
159         {
160 
161             uint dummy;
162 
163             _id = cast(HANDLE) _beginthreadex(null,
164                                               cast(uint)_stackSize,
165                                               &windowsThreadEntryPoint,
166                                               _context,
167                                               CREATE_SUSPENDED,
168                                               &dummy);
169             if (cast(size_t)_id == 0)
170                 assert(false);
171             if (ResumeThread(_id) == -1)
172                 assert(false);
173         }
174         else
175             static assert(false);
176     }
177 
178     /// Wait for that thread termination
179     /// Again, this function can be called only once.
180     /// This actually releases the thread resource.
181     void join()
182     {
183         version(Posix)
184         {
185             void* returnValue;
186             if (0 != pthread_join(_id, &returnValue))
187                 assert(false);
188         }
189         else version(Windows)
190         {
191             if(WaitForSingleObject(_id, INFINITE) != WAIT_OBJECT_0)
192                 assert(false);
193             CloseHandle(_id);
194         }
195     }
196 
197     void* getThreadID()
198     {
199         version(Posix) return cast(void*)_id;
200         else version(Windows) return cast(void*)_id;
201         else assert(false);
202     }
203 
204 private:
205     version(Posix) 
206     {
207         pthread_t _id;
208     }
209     else version(Windows) 
210     {
211         HANDLE _id;
212     }
213     else 
214         static assert(false);
215 
216     // Thread context given to OS thread creation function need to have a constant adress
217     // since there are no guarantees the `Thread` struct will be at the same adress.
218     static struct CreateContext
219     {
220     nothrow:
221     @nogc:
222         ThreadDelegate callback;
223         ThreadDelegateUser callbackUser;
224         void* userData;
225         void call()
226         {
227             if (callback !is null)
228                 callback();
229             else
230                 callbackUser(userData);
231         }
232     }
233     CreateContext* _context;
234 
235     size_t _stackSize;
236 }
237 
238 version(Posix)
239 {
240     extern(C) void* posixThreadEntryPoint(void* threadContext) nothrow @nogc
241     {
242         Thread.CreateContext* context = cast(Thread.CreateContext*)(threadContext);
243         context.call();
244         return null;
245     }
246 }
247 
248 version(Windows)
249 {
250     extern (Windows) uint windowsThreadEntryPoint(void* threadContext) nothrow @nogc
251     {
252         Thread.CreateContext* context = cast(Thread.CreateContext*)(threadContext);
253         context.call();
254         return 0;
255     }
256 }
257 
258 unittest
259 {
260     int outerInt = 0;
261 
262     class A
263     {
264     nothrow @nogc:
265         this()
266         {
267             t = makeThread(&f);
268             t.start();
269         }
270 
271         void join()
272         {
273             t.join();
274         }
275 
276         void f()
277         {
278             outerInt = 1;
279             innerInt = 2;
280 
281             // verify this
282             assert(checkValue0 == 0x11223344);
283             assert(checkValue1 == 0x55667788);
284         }
285 
286         int checkValue0 = 0x11223344;
287         int checkValue1 = 0x55667788;
288         int innerInt = 0;
289         Thread t;
290     }
291 
292     auto a = new A;
293     a.t.join();
294     assert(a.innerInt == 2);
295     a.destroy();
296     assert(outerInt == 1);
297 }
298 
299 /// Launch a function in a newly created thread, which is destroyed afterwards.
300 /// Return the thread so that you can call `.join()` on it.
301 Thread launchInAThread(ThreadDelegate dg, size_t stackSize = 0) nothrow @nogc
302 {
303     Thread t = makeThread(dg, stackSize);
304     t.start();
305     return t;
306 }
307 
308 //
309 // Thread-pool
310 //
311 
312 /// Returns: Number of CPUs.
313 int getTotalNumberOfCPUs() nothrow @nogc
314 {
315     version(Windows)
316     {
317         import core.sys.windows.windows : SYSTEM_INFO, GetSystemInfo;
318         SYSTEM_INFO si;
319         GetSystemInfo(&si);
320         int procs = cast(int) si.dwNumberOfProcessors;
321         if (procs < 1)
322             procs = 1;
323         return procs;
324     }
325     else version(Darwin)
326     {
327         auto nameStr = "machdep.cpu.core_count\0".ptr;
328         uint ans;
329         size_t len = uint.sizeof;
330         sysctlbyname(nameStr, &ans, &len, null, 0);
331         return cast(int)ans;
332     }
333     else version(Posix)
334     {
335         import core.sys.posix.unistd : _SC_NPROCESSORS_ONLN, sysconf;
336         return cast(int) sysconf(_SC_NPROCESSORS_ONLN);
337     }
338     else
339         static assert(false, "OS unsupported");
340 }
341 
342 alias ThreadPoolDelegate = void delegate(int workItem, int threadIndex) nothrow @nogc;
343 
344 
345 debug(threadPoolIsActuallySynchronous)
346 {
347     /// Fake synchronous version of the thread pool
348     /// For measurement purpose, makes it easier to measure actual CPU time spent.
349     class ThreadPool
350     {
351     public:
352     nothrow:
353     @nogc:
354 
355         enum constantThreadId = 0;
356 
357         this(int numThreads = 0, int maxThreads = 0, size_t stackSize = 0)
358         {
359         }
360 
361         ~this()
362         {
363         }
364 
365         void parallelFor(int count, scope ThreadPoolDelegate dg)
366         {
367             foreach(i; 0..count)
368                 dg(cast(int)i, constantThreadId);
369         }
370 
371         void parallelForAsync(int count, scope ThreadPoolDelegate dg)
372         {
373             foreach(i; 0..count)
374                 dg(cast(int)i, constantThreadId);
375         }
376 
377         /// Wait for completion of the previous parallelFor, if any.
378         // It's always safe to call this function before doing another parallelFor.
379         void waitForCompletion()
380         {
381         }
382 
383         int numThreads() pure const
384         {
385             return 1;
386         }
387     }
388 }
389 else
390 {
391 
392     /// Rewrite of the ThreadPool using condition variables.
393     /// FUTURE: this could be speed-up by using futures. Description of the task
394     ///         and associated condition+mutex would go in an external struct.
395     /// Note: the interface of the thread-pool itself is not thread-safe, you cannot give orders from
396     ///       multiple threads at once.
397     class ThreadPool
398     {
399     public:
400     nothrow:
401     @nogc:
402 
403         /// Creates a thread-pool.
404         /// Params:
405         ///     numThreads Number of threads to create (0 = auto).
406         ///     maxThreads A maximum number of threads to create (0 = none).
407         ///     stackSize Stack size to create threads with (0 = auto).
408         this(int numThreads = 0, int maxThreads = 0, size_t stackSize = 0)
409         {
410             // Create sync first
411             _workMutex = makeMutex();
412             _workCondition = makeConditionVariable();
413 
414             _finishMutex = makeMutex();
415             _finishCondition = makeConditionVariable();
416 
417             // Create threads
418             if (numThreads == 0)
419                 numThreads = getTotalNumberOfCPUs();
420 
421             // Limit number of threads eventually (this is done to give other software some leeway
422             // in a soft real-time OS)
423             if (maxThreads != 0)
424             {
425                 if (numThreads > maxThreads)
426                     numThreads = maxThreads;
427             }
428 
429             assert(numThreads >= 1);
430 
431             _threads = mallocSlice!Thread(numThreads);
432             foreach(size_t threadIndex, ref thread; _threads)
433             {
434                 // Pass the index of the thread through user data, so that it can be passed to the task in 
435                 // case these task need thread-local buffers.
436                 void* userData = cast(void*)(threadIndex);
437                 thread = makeThread(&workerThreadFunc, stackSize, userData);
438             }
439 
440             // because of calling currentThreadId, don't start threads until all are created
441             foreach(ref thread; _threads)
442             {
443                 thread.start();
444             }
445         }
446 
447         /// Destroys a thread-pool.
448         ~this()
449         {
450             if (_threads !is null)
451             {
452                 assert(_state == State.initial);
453 
454                 // Put the threadpool is stop state
455                 _workMutex.lock();
456                     _stopFlag = true;
457                 _workMutex.unlock();
458 
459                 // Notify all workers
460                 _workCondition.notifyAll();
461 
462                 // Wait for each thread termination
463                 foreach(ref thread; _threads)
464                     thread.join();
465 
466                 // Detroys each thread
467                 foreach(ref thread; _threads)
468                     thread.destroy();
469                 freeSlice(_threads);
470                 _threads = null;
471                 destroy(_workMutex);
472             }
473         }
474 
475         /// Calls the delegate in parallel, with 0..count as index.
476         /// Immediate waiting for completion.
477         /// If there is only one task, it is run directly on this thread.
478         /// IMPORTANT to be reentrant there! widget drawn alone can then launch same threadpool.
479         void parallelFor(int count, scope ThreadPoolDelegate dg)
480         {
481             assert(_state == State.initial);
482 
483             // Do not launch worker threads for one work-item, not worth it.
484             // (but it is worth it in async).
485             if (count == 1)
486             {
487                 int dummythreadID = 0; // it should not matter which is passed as long as it's a valid ID.
488                 dg(0, dummythreadID);
489                 return;
490             }
491 
492             // Unleash parallel threads.
493             parallelForAsync(count, dg);
494 
495             // Wait for completion immediately.
496             waitForCompletion(); 
497         }
498 
499         /// Same, but does not wait for completion. 
500         /// You cannot have 2 concurrent parallelFor for the same thread-pool.
501         void parallelForAsync(int count, scope ThreadPoolDelegate dg)
502         {
503             assert(_state == State.initial);
504 
505             if (count == 0) // no tasks, exit immediately
506                 return;
507 
508             // At this point we assume all worker threads are waiting for messages
509 
510             // Sets the current task
511             _workMutex.lock();
512 
513             _taskDelegate = dg;       // immutable during this parallelFor
514             _taskNumWorkItem = count; // immutable during this parallelFor
515             _taskCurrentWorkItem = 0;
516             _taskCompleted = 0;
517 
518             _workMutex.unlock();
519 
520             if (count >= _threads.length)
521             {
522                 // wake up all threads
523                 // FUTURE: if number of tasks < number of threads only wake up the necessary amount of threads
524                 _workCondition.notifyAll();
525             }
526             else
527             {
528                 // Less tasks than threads in the pool: only wake-up some threads.
529                 for (int t = 0; t < count; ++t)
530                     _workCondition.notifyOne();
531             }
532 
533             _state = State.parallelForInProgress;
534         }
535 
536         /// Wait for completion of the previous parallelFor, if any.
537         // It's always safe to call this function before doing another parallelFor.
538         void waitForCompletion()
539         {
540             if (_state == State.initial)
541                 return; // that means that parallel threads were not launched
542 
543             assert(_state == State.parallelForInProgress);
544 
545             _finishMutex.lock();
546             scope(exit) _finishMutex.unlock();
547 
548             // FUTURE: order thread will be waken up multiple times
549             //         (one for every completed task)
550             //         maybe that can be optimized
551             while (_taskCompleted < _taskNumWorkItem)
552             {
553                 _finishCondition.wait(&_finishMutex);
554             }
555 
556             _state = State.initial;
557         }
558 
559         int numThreads() pure const
560         {
561             return cast(int)_threads.length;
562         }
563 
564     private:
565         Thread[] _threads = null;
566 
567         // A map to find back thread index from thread system ID
568         void*[] _threadID = null;
569 
570         // Used to signal more work
571         UncheckedMutex _workMutex;
572         ConditionVariable _workCondition;
573 
574         // Used to signal completion
575         UncheckedMutex _finishMutex;
576         ConditionVariable _finishCondition;
577 
578         // These fields represent the current task group (ie. a parallelFor)
579         ThreadPoolDelegate _taskDelegate;
580         int _taskNumWorkItem;     // total number of tasks in this task group
581         int _taskCurrentWorkItem; // current task still left to do (protected by _workMutex)
582         int _taskCompleted;       // every task < taskCompleted has already been completed (protected by _finishMutex)
583 
584         bool _stopFlag;
585 
586         bool hasWork()
587         {
588             return _taskCurrentWorkItem < _taskNumWorkItem;
589         }
590 
591         // Represent the thread-pool state from the user POV
592         enum State
593         {
594             initial,               // tasks can be launched
595             parallelForInProgress, // task were launched, but not waited one
596         }
597         State _state = State.initial;
598 
599         // What worker threads do
600         // MAYDO: threads come here with bad context with struct delegates
601         void workerThreadFunc(void* userData)
602         {
603             while (true)
604             {
605                 int workItem = -1;
606                 {
607                     _workMutex.lock();
608                     scope(exit) _workMutex.unlock();
609 
610                     // Wait for notification
611                     while (!_stopFlag && !hasWork())
612                         _workCondition.wait(&_workMutex);
613 
614                     if (_stopFlag && !hasWork())
615                         return;
616 
617                     assert(hasWork());
618 
619                     // Pick a task and increment counter
620                     workItem = _taskCurrentWorkItem;
621                     _taskCurrentWorkItem++;
622                 }
623 
624                 // Find thread index from user data set by pool
625                 int threadIndex = cast(int)( cast(size_t)(userData) );
626 
627                 // Do the actual task
628                 _taskDelegate(workItem, threadIndex);
629 
630                 // signal completion of one more task
631                 {
632                     _finishMutex.lock();
633                     _taskCompleted++;
634                     _finishMutex.unlock();
635 
636                     _finishCondition.notifyOne(); // wake-up
637                 }
638             }
639         }
640     }
641 }
642 
643 /// Get the current thread OS handle.
644 /// The returned ID is just used for display. You can't get a `Thread` out of it.
645 public static size_t getCurrentThreadId() nothrow @nogc
646 {
647     version(Windows)
648     {
649         return cast(size_t) GetCurrentThreadId();
650     }
651     else version(Posix)
652     {
653         return cast(size_t)cast(void*)pthread_self();
654     }
655     else
656         static assert(false);
657 }
658 
659 unittest
660 {
661     import core.atomic;
662     import dplug.core.nogc;
663 
664     struct A
665     {
666         ThreadPool _pool;
667         int _numThreads;
668 
669         this(int numThreads, int maxThreads = 0, int stackSize = 0)
670         {
671             _pool = mallocNew!ThreadPool(numThreads, maxThreads, stackSize);
672             _numThreads = _pool.numThreads();
673         }
674 
675         ~this()
676         {
677             _pool.destroy();
678         }
679 
680         void launch(int count, bool async) nothrow @nogc
681         {
682             if (async)
683             {
684                 _pool.parallelForAsync(count, &loopBody);
685                 _pool.waitForCompletion();
686             }
687             else
688                 _pool.parallelFor(count, &loopBody);
689         }
690 
691         void loopBody(int workItem, int threadIndex) nothrow @nogc
692         {
693             bool goodIndex = (threadIndex >= 0) && (threadIndex < _numThreads);
694             assert(goodIndex);
695             atomicOp!"+="(counter, 1);
696         }
697 
698         shared(int) counter = 0;
699     }
700 
701     foreach (numThreads;  [0, 1, 2, 4, 8, 16, 32])
702     {
703         auto a = A(numThreads);
704         a.launch(10, false);
705         assert(a.counter == 10);
706 
707         a.launch(500, true);
708         assert(a.counter == 510);
709 
710         a.launch(1, false);
711         assert(a.counter == 511);
712 
713         a.launch(1, true);
714         assert(a.counter == 512);
715 
716         a.launch(0, true);
717         assert(a.counter == 512);
718         a.launch(0, false);
719         assert(a.counter == 512);
720     }
721 }
722 
723 // Bonus: Capacity to get the macOS version
724 
725 version(Darwin)
726 {
727 
728     // Note: .init value is a large future version (100.0.0), so that failure to detect version
729     // lead to newer behaviour.
730     struct MacOSVersion
731     {
732         int major = 100; // eg: major = 10   minor = 7 for 10.7
733         int minor = 0;
734         int patch = 0;
735     }
736 
737     /// Get the macOS version we are running on.
738     /// Note: it only makes sense for macOS, not iOS.
739     /// Note: patch always return zero for now.
740     MacOSVersion getMacOSVersion() nothrow @nogc
741     {
742         char[256] str;
743         size_t size = 256;
744         int ret = sysctlbyname("kern.osrelease", str.ptr, &size, null, 0);
745         MacOSVersion result;
746         if (ret != 0) 
747             return result;
748         int darwinMajor, darwinMinor, darwinPatch;
749         if (3 == sscanf(str.ptr, "%d.%d.%d", &darwinMajor, &darwinMinor, &darwinPatch))
750         {
751             result.patch = 0;
752 
753             switch(darwinMajor)
754             {
755                 case 0: .. case 11:
756                     result.major = 10; // 10.7
757                     result.minor = 7;
758                     break;
759 
760                 case 12: .. case 19:
761                     result.major = 10; // 10.7
762                     result.minor = darwinMajor - 4; // 10.8 to 10.15
763                     break;
764 
765                 case 20:
766                     result.major = 11; // Big Sur
767                     result.minor = 0;
768                     break;
769 
770                 case 21:
771                     result.major = 12; // Monterey
772                     result.minor = 0;
773                     break;
774 
775 
776                 default:
777                     result.major = 100;
778                     result.minor = 0;
779             }
780         }
781         return result;
782     }
783 
784   /*  unittest
785     {
786         MacOSVersion ver = getMacOSVersion();
787         printf("Detected macOS %d.%d.%d\n", ver.major, ver.minor, ver.patch);
788     } */
789 }