Tcl package Thread source code

View Ticket
Login
Bounty program for improvements to Tcl and certain Tcl packages.
Ticket UUID: a57b6c3fd438ec070faf8ec2524cebdd079d6245
Title: -eventmark + thread::send -async == deadlock
Type: Bug Version: 2.7.2
Submitter: pooryorick Created on: 2015-04-12 20:02:17
Subsystem: 80. Thread Package Assigned To: jan.nijtmans
Priority: 5 Medium Severity: Important
Status: Closed Last Modified: 2015-09-30 11:33:22
Resolution: Fixed Closed By: jan.nijtmans
    Closed on: 2015-09-30 11:33:22
Description:

thread::configure $threadid -eventmark ... limits the number of asynchronous calls to the thread. If thread A and thread B are configured with an -eventmark, and A uses the variant of thread::send -async that specifies a variable in which to store the result of the call, a deadlock can occcur if A's -eventmark limit is reached in the meantime. Apparently, the response from B to A that stores the result in the specified variable is subject to the queue limit of A. Because asynchronous calls become blocking when a thread's queue limit is reached, A ends up blocked in an -async call to B, which in turn is blocked waiting for A's queue to die down so that it can write its response.

I think this is a bug because with -eventmark, the intent is normally to throttle incoming calls, not to limit responses to calls to other threads, and it can be a real head-scratcher when such a deadlock occurs in a non-trivial threaded program. Even when one is aware of this behaviour, in many cases it significantly diminishes the utility of -eventmark and complicates the design of threaded programs.

Here's a script that illustrates the issue:

#! /bin/env tclsh

package require Thread

set consumer [thread::create {
    proc eat value {
        puts [list [thread::id] receive $value]
    }
    thread::wait
}]
thread::configure $consumer -eventmark 10 

set filter [thread::create {
    proc eat value {
        variable consumer
                
                # This command causes the threads to hang.
        thread::send -async $consumer [list eat [expr {$value * 2}]] [namespace current]::reply

                # Replace the previous command with this one, and the deadlock resolves.
        #thread::send -async $consumer [list eat [expr {$value * 2}]] 
    }
    thread::wait
}]
thread::send $filter [list variable consumer $consumer]
thread::configure $filter -eventmark 10 

for {set i 0} {$i < 50} {incr i} {
    set thread [thread::create {
        proc go {} {
            variable filter
            set value 1
            while 1 {
                incr value 2
                #puts [list sending $value to $filter]
                thread::send -async $filter [list eat $value]
            }
        }
        thread::wait
    }]
    thread::send -async $thread [list variable filter $filter]
    thread::send -async $thread {after 0 [list after idle go]}
}

vwait forever

One fix would be to exclude -async callbacks from the queue limit:

--- generic/threadCmd.c
+++ generic/threadCmd.c
@@ -265,10 +265,11 @@
  * Definition of flags for ThreadSend.
  */

 #define THREAD_SEND_WAIT 1<<1
 #define THREAD_SEND_HEAD 1<<2
+#define THREAD_SEND_CLBK 1<<4

 #ifdef BUILD_thread
 # undef  TCL_STORAGE_CLASS
 # define TCL_STORAGE_CLASS DLLEXPORT
 #endif
@@ -2745,15 +2746,17 @@
     Tcl_ThreadAlert(thrId);

     if ((flags & THREAD_SEND_WAIT) == 0) {
         /*
          * Might potentially spend some time here, until the
-         * worker thread clean's up it's queue a little bit.
+         * worker thread cleans up its queue a little bit.
          */
-        while (tsdPtr->maxEventsCount &&
-               tsdPtr->eventsPending > tsdPtr->maxEventsCount) {
-            Tcl_ConditionWait(&tsdPtr->doOneEvent, &threadMutex, NULL);
+        if ((flags & THREAD_SEND_CLBK) == 0) {
+            while (tsdPtr->maxEventsCount &&
+                   tsdPtr->eventsPending > tsdPtr->maxEventsCount) {
+                Tcl_ConditionWait(&tsdPtr->doOneEvent, &threadMutex, NULL);
+            }
         }
         Tcl_MutexUnlock(&threadMutex);
         return TCL_OK;
     }

@@ -3094,11 +3097,11 @@
     if (interp != NULL) {
         Tcl_Preserve((ClientData)interp);

         if (clbkPtr && clbkPtr->threadId == thrId) {
             Tcl_Release((ClientData)interp);
-            /* Watch: this thread evaluates it's own callback. */
+            /* Watch: this thread evaluates its own callback. */
             interp = clbkPtr->interp;
             Tcl_Preserve((ClientData)interp);
         }

         Tcl_ResetResult(interp);
@@ -3157,11 +3160,11 @@
         if (code != TCL_OK) {
             ThreadErrorProc(interp);
         }

         ThreadSetResult(interp, code, &clbkPtr->result);
-        ThreadSend(interp, clbkPtr->threadId, tmpPtr, NULL, 0);
+        ThreadSend(interp, clbkPtr->threadId, tmpPtr, NULL, THREAD_SEND_CLBK);

     } else if (code != TCL_OK) {
         /*
          * Only pass errors onto the registered error handler
          * when we don't have a result target for this event.

See also http://wiki.tcl.tk/41344

User Comments: jan.nijtmans added on 2015-09-30 11:33:22:
Looks good to me too, so merged to trunk

dgp added on 2015-05-01 17:18:45:
Checked in as branch bug-a57b6c3fd4 .  Looks good to me,
but another set of eyes before merge would be good too.