@@ -94,12 +94,63 @@ pub mod compiled {
9494
9595 pub unsafe fn cleanup ( ) { }
9696
97+ // Rationale for all of these functions being inline(never)
98+ //
99+ // The #[thread_local] annotation gets propagated all the way through to
100+ // LLVM, meaning the global is specially treated by LLVM to lower it to an
101+ // efficient sequence of instructions. This also involves dealing with fun
102+ // stuff in object files and whatnot. Regardless, it turns out this causes
103+ // trouble with green threads and lots of optimizations turned on. The
104+ // following case study was done on linux x86_64, but I would imagine that
105+ // other platforms are similar.
106+ //
107+ // On linux, the instruction sequence for loading the tls pointer global
108+ // looks like:
109+ //
110+ // mov %fs:0x0, %rax
111+ // mov -0x8(%rax), %rbx
112+ //
113+ // This code leads me to believe that (%fs:0x0) is a table, and then the
114+ // table contains the TLS values for the process. Hence, the slot at offset
115+ // -0x8 is the task TLS pointer. This leads us to the conclusion that this
116+ // table is the actual thread local part of each thread. The kernel sets up
117+ // the fs segment selector to point at the right region of memory for each
118+ // thread.
119+ //
120+ // Optimizations lead me to believe that this code is lowered to these
121+ // instructions in the LLVM codegen passes, because you'll see code like
122+ // this when everything is optimized:
123+ //
124+ // mov %fs:0x0, %r14
125+ // mov -0x8(%r14), %rbx
126+ // // do something with %rbx, the rust Task pointer
127+ //
128+ // ... // <- do more things
129+ //
130+ // mov -0x8(%r14), %rbx
131+ // // do something else with %rbx
132+ //
133+ // Note that the optimization done here is that the first load is not
134+ // duplicated during the lower instructions. This means that the %fs:0x0
135+ // memory location is only dereferenced once.
136+ //
137+ // Normally, this is actually a good thing! With green threads, however,
138+ // it's very possible for the code labeled "do more things" to context
139+ // switch to another thread. If this happens, then we *must* re-load %fs:0x0
140+ // because it's changed (we're on a different thread). If we don't re-load
141+ // the table location, then we'll be reading the original thread's TLS
142+ // values, not our thread's TLS values.
143+ //
144+ // Hence, we never inline these functions. By never inlining, we're
145+ // guaranteed that loading the table is a local decision which is forced to
146+ // *always* happen.
147+
97148 /// Give a pointer to thread-local storage.
98149 ///
99150 /// # Safety note
100151 ///
101152 /// Does not validate the pointer type.
102- #[ inline]
153+ #[ inline( never ) ] // see comments above
103154 pub unsafe fn put < T > ( sched : ~T ) {
104155 RT_TLS_PTR = cast:: transmute ( sched)
105156 }
@@ -109,7 +160,7 @@ pub mod compiled {
109160 /// # Safety note
110161 ///
111162 /// Does not validate the pointer type.
112- #[ inline]
163+ #[ inline( never ) ] // see comments above
113164 pub unsafe fn take < T > ( ) -> ~T {
114165 let ptr = RT_TLS_PTR ;
115166 rtassert ! ( !ptr. is_null( ) ) ;
@@ -124,7 +175,7 @@ pub mod compiled {
124175 /// # Safety note
125176 ///
126177 /// Does not validate the pointer type.
127- #[ inline]
178+ #[ inline( never ) ] // see comments above
128179 pub unsafe fn try_take < T > ( ) -> Option < ~T > {
129180 let ptr = RT_TLS_PTR ;
130181 if ptr. is_null ( ) {
@@ -143,25 +194,28 @@ pub mod compiled {
143194 ///
144195 /// Does not validate the pointer type.
145196 /// Leaves the old pointer in TLS for speed.
146- #[ inline]
197+ #[ inline( never ) ] // see comments above
147198 pub unsafe fn unsafe_take < T > ( ) -> ~T {
148199 cast:: transmute ( RT_TLS_PTR )
149200 }
150201
151202 /// Check whether there is a thread-local pointer installed.
203+ #[ inline( never) ] // see comments above
152204 pub fn exists ( ) -> bool {
153205 unsafe {
154206 RT_TLS_PTR . is_not_null ( )
155207 }
156208 }
157209
210+ #[ inline( never) ] // see comments above
158211 pub unsafe fn unsafe_borrow < T > ( ) -> * mut T {
159212 if RT_TLS_PTR . is_null ( ) {
160213 rtabort ! ( "thread-local pointer is null. bogus!" ) ;
161214 }
162215 RT_TLS_PTR as * mut T
163216 }
164217
218+ #[ inline( never) ] // see comments above
165219 pub unsafe fn try_unsafe_borrow < T > ( ) -> Option < * mut T > {
166220 if RT_TLS_PTR . is_null ( ) {
167221 None
0 commit comments