stasis/control: Fix possible deadlock with swap channel
authorGeorge Joseph <gjoseph@digium.com>
Fri, 1 Sep 2017 10:17:02 +0000 (04:17 -0600)
committerRichard Mudgett <rmudgett@digium.com>
Wed, 6 Sep 2017 18:00:49 +0000 (13:00 -0500)
If an error occurs during a bridge impart it's possible that
the "bridge_after" callback might try to run before
control_swap_channel_in_bridge has been signalled to continue.
Since control_swap_channel_in_bridge is holding the control lock
and the callback needs it, a deadlock will occur.

* control_swap_channel_in_bridge now only holds the control
  lock while it's actually modifying the control structure and
  releases it while the bridge impart is running.
* bridge_after_cb is now tolerant of impart failures.

Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3

include/asterisk/bridge_after.h
main/bridge.c
main/bridge_after.c
res/stasis/control.c

index 53f30b9..0451685 100644 (file)
@@ -45,6 +45,8 @@ enum ast_bridge_after_cb_reason {
        AST_BRIDGE_AFTER_CB_REASON_DEPART,
        /*! Was explicitly removed by external code. */
        AST_BRIDGE_AFTER_CB_REASON_REMOVED,
+       /*! The channel failed to enter the bridge. */
+       AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED,
 };
 
 /*!
index b732d5f..ab12ecf 100644 (file)
@@ -1758,12 +1758,13 @@ join_exit:;
 static void *bridge_channel_depart_thread(void *data)
 {
        struct ast_bridge_channel *bridge_channel = data;
+       int res = 0;
 
        if (bridge_channel->callid) {
                ast_callid_threadassoc_add(bridge_channel->callid);
        }
 
-       bridge_channel_internal_join(bridge_channel);
+       res = bridge_channel_internal_join(bridge_channel);
 
        /*
         * cleanup
@@ -1775,7 +1776,8 @@ static void *bridge_channel_depart_thread(void *data)
        ast_bridge_features_destroy(bridge_channel->features);
        bridge_channel->features = NULL;
 
-       ast_bridge_discard_after_callback(bridge_channel->chan, AST_BRIDGE_AFTER_CB_REASON_DEPART);
+       ast_bridge_discard_after_callback(bridge_channel->chan,
+               res ? AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED : AST_BRIDGE_AFTER_CB_REASON_DEPART);
        /* If join failed there will be impart threads waiting. */
        bridge_channel_impart_signal(bridge_channel->chan);
        ast_bridge_discard_after_goto(bridge_channel->chan);
index d4aec75..69c8f98 100644 (file)
@@ -293,23 +293,23 @@ int ast_bridge_set_after_callback(struct ast_channel *chan, ast_bridge_after_cb
        return 0;
 }
 
-const char *reason_strings[] = {
-       [AST_BRIDGE_AFTER_CB_REASON_DESTROY] = "Channel destroyed (hungup)",
-       [AST_BRIDGE_AFTER_CB_REASON_REPLACED] = "Callback was replaced",
-       [AST_BRIDGE_AFTER_CB_REASON_MASQUERADE] = "Channel masqueraded",
-       [AST_BRIDGE_AFTER_CB_REASON_DEPART] = "Channel was departed from bridge",
-       [AST_BRIDGE_AFTER_CB_REASON_REMOVED] = "Callback was removed",
-};
-
 const char *ast_bridge_after_cb_reason_string(enum ast_bridge_after_cb_reason reason)
 {
-       if (reason < AST_BRIDGE_AFTER_CB_REASON_DESTROY
-               || AST_BRIDGE_AFTER_CB_REASON_REMOVED < reason
-               || !reason_strings[reason]) {
-               return "Unknown";
-       }
-
-       return reason_strings[reason];
+       switch (reason) {
+       case AST_BRIDGE_AFTER_CB_REASON_DESTROY:
+               return "Channel destroyed (hungup)";
+       case AST_BRIDGE_AFTER_CB_REASON_REPLACED:
+               return "Callback was replaced";
+       case AST_BRIDGE_AFTER_CB_REASON_MASQUERADE:
+               return "Channel masqueraded";
+       case AST_BRIDGE_AFTER_CB_REASON_DEPART:
+               return "Channel was departed from bridge";
+       case AST_BRIDGE_AFTER_CB_REASON_REMOVED:
+               return "Callback was removed";
+       case AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED:
+               return "Channel failed joining the bridge";
+       }
+       return "Unknown";
 }
 
 struct after_bridge_goto_ds {
index 503f111..085ca7e 100644 (file)
@@ -983,14 +983,21 @@ static int bridge_channel_depart(struct stasis_app_control *control,
        return 0;
 }
 
-static void bridge_after_cb(struct ast_channel *chan, void *data)
+static void internal_bridge_after_cb(struct ast_channel *chan, void *data,
+       enum ast_bridge_after_cb_reason reason)
 {
        struct stasis_app_control *control = data;
        SCOPED_AO2LOCK(lock, control);
        struct ast_bridge_channel *bridge_channel;
 
-       ast_debug(3, "%s, %s: Channel leaving bridge\n",
-               ast_channel_uniqueid(chan), control->bridge->uniqueid);
+       ast_debug(3, "%s, %s: %s\n",
+               ast_channel_uniqueid(chan), control->bridge ? control->bridge->uniqueid : "unknown",
+                       ast_bridge_after_cb_reason_string(reason));
+
+       if (reason == AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED) {
+               /* The impart actually failed so control->bridge isn't valid. */
+               control->bridge = NULL;
+       }
 
        ast_assert(chan == control->channel);
 
@@ -998,18 +1005,21 @@ static void bridge_after_cb(struct ast_channel *chan, void *data)
        ast_channel_pbx_set(control->channel, control->pbx);
        control->pbx = NULL;
 
-       app_unsubscribe_bridge(control->app, control->bridge);
+       if (control->bridge) {
+               app_unsubscribe_bridge(control->app, control->bridge);
 
-       /* No longer in the bridge */
-       control->bridge = NULL;
+               /* No longer in the bridge */
+               control->bridge = NULL;
 
-       /* Get the bridge channel so we don't depart from the wrong bridge */
-       ast_channel_lock(chan);
-       bridge_channel = ast_channel_get_bridge_channel(chan);
-       ast_channel_unlock(chan);
+               /* Get the bridge channel so we don't depart from the wrong bridge */
+               ast_channel_lock(chan);
+               bridge_channel = ast_channel_get_bridge_channel(chan);
+               ast_channel_unlock(chan);
+
+               /* Depart this channel from the bridge using the command queue if possible */
+               stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);
+       }
 
-       /* Depart this channel from the bridge using the command queue if possible */
-       stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);
        if (stasis_app_channel_is_stasis_end_published(chan)) {
                /* The channel has had a StasisEnd published on it, but until now had remained in
                 * the bridging system. This means that the channel moved from a Stasis bridge to a
@@ -1027,12 +1037,19 @@ static void bridge_after_cb(struct ast_channel *chan, void *data)
        }
 }
 
+static void bridge_after_cb(struct ast_channel *chan, void *data)
+{
+       struct stasis_app_control *control = data;
+
+       internal_bridge_after_cb(control->channel, data, AST_BRIDGE_AFTER_CB_REASON_DEPART);
+}
+
 static void bridge_after_cb_failed(enum ast_bridge_after_cb_reason reason,
        void *data)
 {
        struct stasis_app_control *control = data;
 
-       bridge_after_cb(control->channel, data);
+       internal_bridge_after_cb(control->channel, data, reason);
 
        ast_debug(3, "  reason: %s\n",
                ast_bridge_after_cb_reason_string(reason));
@@ -1171,46 +1188,57 @@ int control_swap_channel_in_bridge(struct stasis_app_control *control, struct as
                return -1;
        }
 
-       {
-               /* pbx and bridge are modified by the bridging impart thread.
-                * It shouldn't happen concurrently, but we still need to lock
-                * for the memory fence.
-                */
-               SCOPED_AO2LOCK(lock, control);
+       ao2_lock(control);
 
-               /* Ensure the controlling application is subscribed early enough
-                * to receive the ChannelEnteredBridge message. This works in concert
-                * with the subscription handled in the Stasis application execution
-                * loop */
-               app_subscribe_bridge(control->app, bridge);
-
-               /* Save off the channel's PBX */
-               ast_assert(control->pbx == NULL);
-               if (!control->pbx) {
-                       control->pbx = ast_channel_pbx(chan);
-                       ast_channel_pbx_set(chan, NULL);
-               }
+       /* Ensure the controlling application is subscribed early enough
+        * to receive the ChannelEnteredBridge message. This works in concert
+        * with the subscription handled in the Stasis application execution
+        * loop */
+       app_subscribe_bridge(control->app, bridge);
 
-               res = ast_bridge_impart(bridge,
-                       chan,
-                       swap,
-                       NULL, /* features */
-                       AST_BRIDGE_IMPART_CHAN_DEPARTABLE);
-               if (res != 0) {
-                       ast_log(LOG_ERROR, "Error adding channel to bridge\n");
-                       ast_channel_pbx_set(chan, control->pbx);
-                       control->pbx = NULL;
-                       return -1;
-               }
+       /* Save off the channel's PBX */
+       ast_assert(control->pbx == NULL);
+       if (!control->pbx) {
+               control->pbx = ast_channel_pbx(chan);
+               ast_channel_pbx_set(chan, NULL);
+       }
 
-               ast_assert(stasis_app_get_bridge(control) == NULL);
-               control->bridge = bridge;
+       ast_assert(stasis_app_get_bridge(control) == NULL);
+       /* We need to set control->bridge here since bridge_after_cb may be run
+        * before ast_bridge_impart returns.  bridge_after_cb gets a reason
+        * code so it can tell if the bridge is actually valid or not.
+        */
+       control->bridge = bridge;
 
+       /* We can't be holding the control lock while impart is running
+        * or we could create a deadlock with bridge_after_cb which also
+        * tries to lock control.
+        */
+       ao2_unlock(control);
+       res = ast_bridge_impart(bridge,
+               chan,
+               swap,
+               NULL, /* features */
+               AST_BRIDGE_IMPART_CHAN_DEPARTABLE);
+       if (res != 0) {
+               /* ast_bridge_impart failed before it could spawn the depart
+                * thread.  The callbacks aren't called in this case.
+                * The impart could still fail even if ast_bridge_impart returned
+                * ok but that's handled by bridge_after_cb.
+                */
+               ast_log(LOG_ERROR, "Error adding channel to bridge\n");
+               ao2_lock(control);
+               ast_channel_pbx_set(chan, control->pbx);
+               control->pbx = NULL;
+               control->bridge = NULL;
+               ao2_unlock(control);
+       } else {
                ast_channel_lock(chan);
                set_interval_hook(chan);
                ast_channel_unlock(chan);
        }
-       return 0;
+
+       return res;
 }
 
 int control_add_channel_to_bridge(struct stasis_app_control *control, struct ast_channel *chan, void *data)