Fix two race conditions and ref counting issue when joining a bridge
authorMatthew Jordan <mjordan@digium.com>
Mon, 12 Aug 2013 15:59:19 +0000 (15:59 +0000)
committerMatthew Jordan <mjordan@digium.com>
Mon, 12 Aug 2013 15:59:19 +0000 (15:59 +0000)
These problems were all caught by a test in the Asterisk Test Suite that
originated some Local channels and attempted to move the ;2 half of the Local
channel into a bridge using the Bridge AMI action.

(1) When originating a channel, the Newchannel event is emitted quickly;
    however, the ;2 channel will not have a pbx thread assigned to it until
    after the outbound 'dialing' for the ;1 is complete. Thus, there is a period
    of time where the outside world "knows" of the channel's existence and can
    influence it but Asterisk has not yet started the dialplan execution thread.
    If a Bridge AMI action is taken on the channel, the channel appears to be a
    Dialed channel with no PBX thread; hence, the channel will be imparted into
    the Bridge by first 'yanking' the channel. At the same time, a race condition
    can occur after the yank (but before entering the bridge) when ;1 answers
    and starts a PBX on the ;2. The end result currently is an assertion failure
    in the Bridging API, as a channel with a PBX is imparted into the Bridge.

    There's no way to prevent AMI from attempting to Bridge a channel
    immediately after creation; likewise, holding the channel lock through the
    entire Dial operation is unwise (and impossible). Instead of treating the
    presence of a PBX thread as an error, we simply bail out of the adding the
    channel to the bridge through ast_bridge_impart. The Bridge action will
    then fail - but we avoid a situation where the channel is both executing
    a PBX thread and simultaneously being given a separate thread in the
    bridging system (which would be a "bad thing"). Since imparting a channel
    with a PBX *can* occur and is not a programming error, the asserts have been
    removed.

(2) When the first condition occurs, we have to take one of two actions: either
    hangup the yanked channel as it did not enter the bridge, or deref it
    because we don't own it. We can determine if we own it or not by testing
    for the presence of the PBX thread. If we hung it up directly, we'd crash.

(3) bridge_find_channel does not increase the reference count of the
    ast_bridge_channel object. The RAII_VAR usage in ast_bridge_add_channel
    thus created a ticking time bomb in whatever bridge the channel moved into,
    as the destructor for the ast_bridge_channel object would be called.

Review: https://reviewboard.asterisk.org/r/2741/

git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@396543 65c4cc65-6c06-0410-ace0-fbb531ad65f3

main/bridge.c
main/bridge_channel.c
main/features.c

index b8f69fe..bc983b1 100644 (file)
@@ -1439,7 +1439,7 @@ int ast_bridge_join(struct ast_bridge *bridge,
        int pass_reference)
 {
        struct ast_bridge_channel *bridge_channel;
-       int res;
+       int res = 0;
 
        bridge_channel = bridge_channel_internal_alloc(bridge);
        if (pass_reference) {
@@ -1460,16 +1460,21 @@ int ast_bridge_join(struct ast_bridge *bridge,
                bridge_channel->tech_args = *tech_args;
        }
 
-       /* Initialize various other elements of the bridge channel structure that we can't do above */
        ast_channel_lock(chan);
-       ast_channel_internal_bridge_channel_set(chan, bridge_channel);
+       if (ast_test_flag(ast_channel_flags(chan), AST_FLAG_ZOMBIE)) {
+               res = -1;
+       } else {
+               ast_channel_internal_bridge_channel_set(chan, bridge_channel);
+       }
        ast_channel_unlock(chan);
        bridge_channel->thread = pthread_self();
        bridge_channel->chan = chan;
        bridge_channel->swap = swap;
        bridge_channel->features = features;
 
-       res = bridge_channel_internal_join(bridge_channel);
+       if (!res) {
+               res = bridge_channel_internal_join(bridge_channel);
+       }
 
        /* Cleanup all the data in the bridge channel after it leaves the bridge. */
        ast_channel_lock(chan);
@@ -1546,9 +1551,16 @@ static void *bridge_channel_ind_thread(void *data)
 
 int ast_bridge_impart(struct ast_bridge *bridge, struct ast_channel *chan, struct ast_channel *swap, struct ast_bridge_features *features, int independent)
 {
-       int res;
+       int res = 0;
        struct ast_bridge_channel *bridge_channel;
 
+       /* Imparted channels cannot have a PBX. */
+       if (ast_channel_pbx(chan)) {
+               ast_log(AST_LOG_WARNING, "Channel %s has a PBX thread and cannot be imparted into bridge %s\n",
+                       ast_channel_name(chan), bridge->uniqueid);
+               return -1;
+       }
+
        /* Supply an empty features structure if the caller did not. */
        if (!features) {
                features = ast_bridge_features_new();
@@ -1564,9 +1576,14 @@ int ast_bridge_impart(struct ast_bridge *bridge, struct ast_channel *chan, struc
                return -1;
        }
 
-       /* Setup various parameters */
        ast_channel_lock(chan);
-       ast_channel_internal_bridge_channel_set(chan, bridge_channel);
+       if (ast_test_flag(ast_channel_flags(chan), AST_FLAG_ZOMBIE)) {
+               ast_log(AST_LOG_NOTICE, "Channel %s is a zombie and cannot be imparted into bridge %s\n",
+                       ast_channel_name(chan), bridge->uniqueid);
+               res = -1;
+       } else {
+               ast_channel_internal_bridge_channel_set(chan, bridge_channel);
+       }
        ast_channel_unlock(chan);
        bridge_channel->chan = chan;
        bridge_channel->swap = swap;
@@ -1575,18 +1592,14 @@ int ast_bridge_impart(struct ast_bridge *bridge, struct ast_channel *chan, struc
        bridge_channel->callid = ast_read_threadstorage_callid();
 
        /* Actually create the thread that will handle the channel */
-       if (independent) {
-               /* Independently imparted channels cannot have a PBX. */
-               ast_assert(!ast_channel_pbx(chan));
-
-               res = ast_pthread_create_detached(&bridge_channel->thread, NULL,
-                       bridge_channel_ind_thread, bridge_channel);
-       } else {
-               /* Imparted channels to be departed should not have a PBX either. */
-               ast_assert(!ast_channel_pbx(chan));
-
-               res = ast_pthread_create(&bridge_channel->thread, NULL,
-                       bridge_channel_depart_thread, bridge_channel);
+       if (!res) {
+               if (independent) {
+                       res = ast_pthread_create_detached(&bridge_channel->thread, NULL,
+                               bridge_channel_ind_thread, bridge_channel);
+               } else {
+                       res = ast_pthread_create(&bridge_channel->thread, NULL,
+                               bridge_channel_depart_thread, bridge_channel);
+               }
        }
 
        if (res) {
@@ -2105,10 +2118,11 @@ int ast_bridge_add_channel(struct ast_bridge *bridge, struct ast_channel *chan,
        ast_channel_unlock(chan);
 
        if (chan_bridge) {
-               RAII_VAR(struct ast_bridge_channel *, bridge_channel, NULL, ao2_cleanup);
+               struct ast_bridge_channel *bridge_channel;
 
                ast_bridge_lock_both(bridge, chan_bridge);
                bridge_channel = bridge_find_channel(chan_bridge, chan);
+
                if (bridge_move_locked(bridge, chan_bridge, chan, NULL, 1)) {
                        ast_bridge_unlock(chan_bridge);
                        ast_bridge_unlock(bridge);
@@ -2145,8 +2159,17 @@ int ast_bridge_add_channel(struct ast_bridge *bridge, struct ast_channel *chan,
                }
                ast_channel_ref(yanked_chan);
                if (ast_bridge_impart(bridge, yanked_chan, NULL, features, 1)) {
-                       ast_log(LOG_WARNING, "Could not add %s to the bridge\n", ast_channel_name(chan));
-                       ast_hangup(yanked_chan);
+                       /* It is possible for us to yank a channel and have some other
+                        * thread start a PBX on the channl after we yanked it. In particular,
+                        * this can theoretically happen on the ;2 of a Local channel if we
+                        * yank it prior to the ;1 being answered. Make sure that it isn't
+                        * executing a PBX before hanging it up.
+                        */
+                       if (ast_channel_pbx(yanked_chan)) {
+                               ast_channel_unref(yanked_chan);
+                       } else {
+                               ast_hangup(yanked_chan);
+                       }
                        return -1;
                }
        }
index 830abb7..5893370 100644 (file)
@@ -1827,22 +1827,30 @@ int bridge_channel_internal_join(struct ast_bridge_channel *bridge_channel)
                bridge_channel, ast_channel_name(bridge_channel->chan));
 
        /*
-        * Get "in the bridge" before pushing the channel for any
-        * masquerades on the channel to happen before bridging.
+        * Directly locking the bridge is safe here because nobody else
+        * knows about this bridge_channel yet.
+        */
+       ast_bridge_lock(bridge_channel->bridge);
+
+       /* Make sure we're still good to be put into a bridge
         */
        ast_channel_lock(bridge_channel->chan);
+       if (ast_channel_internal_bridge(bridge_channel->chan)
+               || ast_test_flag(ast_channel_flags(bridge_channel->chan), AST_FLAG_ZOMBIE)) {
+               ast_channel_unlock(bridge_channel->chan);
+               ast_bridge_unlock(bridge_channel->bridge);
+               ast_debug(1, "Bridge %s: %p(%s) failed to join Bridge\n",
+                       bridge_channel->bridge->uniqueid,
+                       bridge_channel,
+                       ast_channel_name(bridge_channel->chan));
+               return -1;
+       }
        ast_channel_internal_bridge_set(bridge_channel->chan, bridge_channel->bridge);
        ast_channel_unlock(bridge_channel->chan);
 
        /* Add the jitterbuffer if the channel requires it */
        ast_jb_enable_for_channel(bridge_channel->chan);
 
-       /*
-        * Directly locking the bridge is safe here because nobody else
-        * knows about this bridge_channel yet.
-        */
-       ast_bridge_lock(bridge_channel->bridge);
-
        if (!bridge_channel->bridge->callid) {
                bridge_channel->bridge->callid = ast_read_threadstorage_callid();
        }
index 9e5dfdd..6585aec 100644 (file)
@@ -1142,6 +1142,8 @@ static int action_bridge(struct mansession *s, const struct message *m)
                return 0;
        }
 
+       ast_debug(1, "Performing Bridge action on %s and %s\n", channela, channelb);
+
        /* Start with chana */
        chana = ast_channel_get_by_name_prefix(channela, strlen(channela));
        if (!chana) {