pjsip_options.c: Fix race condition stopping periodic out of dialog OPTIONS request.
authorRichard Mudgett <rmudgett@digium.com>
Wed, 24 Sep 2014 18:35:47 +0000 (18:35 +0000)
committerRichard Mudgett <rmudgett@digium.com>
Wed, 24 Sep 2014 18:35:47 +0000 (18:35 +0000)
The crash on the issues is a result of an invalid transport configuration
change when asterisk is restarted.  The attempt to send the qualify
request fails and we cleaned up.  However, the callback is also called
which results in a double unref of the objects involved.

* Put a wrapper around pjsip_endpt_send_request() to detect when the
passed in callback is called because of an error so callers can know to
not cleanup.

* Made send_request_cb() able to handle repeated challenges (Up to 10).

* Fix periodic endpoint qualify OPTIONS sched deletion race by avoiding
it.  The sched entry will no longer self stop and must be externally
stopped.

* Added REF_DEBUG description tags to struct sched_data in
pjsip_options.c.

* Fix some off-nominal ref leaks in schedule_qualify(),
qualify_and_schedule().

* Reordered pjsip_options.c module start/stop code to cleanup better on
error.

ASTERISK-24295 #close
Reported by: Rogger Padilla

Review: https://reviewboard.asterisk.org/r/3954/
........

Merged revisions 423866 from http://svn.asterisk.org/svn/asterisk/branches/12
........

Merged revisions 423867 from http://svn.asterisk.org/svn/asterisk/branches/13

git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@423868 65c4cc65-6c06-0410-ace0-fbb531ad65f3

res/res_pjsip.c
res/res_pjsip/pjsip_options.c

index 1fdbbd6..87a1c82 100644 (file)
@@ -2431,24 +2431,35 @@ static pj_bool_t does_method_match(const pj_str_t *message_method, const char *s
        return pj_stristr(&method, message_method) ? PJ_TRUE : PJ_FALSE;
 }
 
+/*! Maximum number of challenges before assuming that we are in a loop */
+#define MAX_RX_CHALLENGES      10
+
 /*! \brief Structure to hold information about an outbound request */
 struct send_request_data {
-       struct ast_sip_endpoint *endpoint;              /*! The endpoint associated with this request */
-       void *token;                                    /*! Information to be provided to the callback upon receipt of a response */
-       void (*callback)(void *token, pjsip_event *e);  /*! The callback to be called upon receipt of a response */
+       /*! The endpoint associated with this request */
+       struct ast_sip_endpoint *endpoint;
+       /*! Information to be provided to the callback upon receipt of a response */
+       void *token;
+       /*! The callback to be called upon receipt of a response */
+       void (*callback)(void *token, pjsip_event *e);
+       /*! Number of challenges received. */
+       unsigned int challenge_count;
 };
 
 static void send_request_data_destroy(void *obj)
 {
        struct send_request_data *req_data = obj;
+
        ao2_cleanup(req_data->endpoint);
 }
 
 static struct send_request_data *send_request_data_alloc(struct ast_sip_endpoint *endpoint,
        void *token, void (*callback)(void *token, pjsip_event *e))
 {
-       struct send_request_data *req_data = ao2_alloc(sizeof(*req_data), send_request_data_destroy);
+       struct send_request_data *req_data;
 
+       req_data = ao2_alloc_options(sizeof(*req_data), send_request_data_destroy,
+               AO2_ALLOC_OPT_LOCK_NOLOCK);
        if (!req_data) {
                return NULL;
        }
@@ -2460,50 +2471,152 @@ static struct send_request_data *send_request_data_alloc(struct ast_sip_endpoint
        return req_data;
 }
 
-static void send_request_cb(void *token, pjsip_event *e)
+struct send_request_wrapper {
+       /*! Information to be provided to the callback upon receipt of a response */
+       void *token;
+       /*! The callback to be called upon receipt of a response */
+       void (*callback)(void *token, pjsip_event *e);
+       /*! Non-zero when the callback is called. */
+       unsigned int cb_called;
+};
+
+static void endpt_send_request_wrapper(void *token, pjsip_event *e)
 {
-       RAII_VAR(struct send_request_data *, req_data, token, ao2_cleanup);
-       pjsip_transaction *tsx = e->body.tsx_state.tsx;
-       pjsip_rx_data *challenge = e->body.tsx_state.src.rdata;
-       pjsip_tx_data *tdata;
-       struct ast_sip_supplement *supplement;
+       struct send_request_wrapper *req_wrapper = token;
 
-       AST_RWLIST_RDLOCK(&supplements);
-       AST_LIST_TRAVERSE(&supplements, supplement, next) {
-               if (supplement->incoming_response && does_method_match(&challenge->msg_info.cseq->method.name, supplement->method)) {
-                       supplement->incoming_response(req_data->endpoint, challenge);
+       req_wrapper->cb_called = 1;
+       if (req_wrapper->callback) {
+               req_wrapper->callback(req_wrapper->token, e);
+       }
+       ao2_ref(req_wrapper, -1);
+}
+
+static pj_status_t endpt_send_request(struct ast_sip_endpoint *endpoint,
+       pjsip_tx_data *tdata, pj_int32_t timeout, void *token, pjsip_endpt_send_callback cb)
+{
+       struct send_request_wrapper *req_wrapper;
+       pj_status_t ret_val;
+
+       /* Create wrapper to detect if the callback was actually called on an error. */
+       req_wrapper = ao2_alloc_options(sizeof(*req_wrapper), NULL,
+               AO2_ALLOC_OPT_LOCK_NOLOCK);
+       if (!req_wrapper) {
+               pjsip_tx_data_dec_ref(tdata);
+               return PJ_ENOMEM;
+       }
+       req_wrapper->token = token;
+       req_wrapper->callback = cb;
+
+       ao2_ref(req_wrapper, +1);
+       ret_val = pjsip_endpt_send_request(ast_sip_get_pjsip_endpoint(), tdata, timeout,
+               req_wrapper, endpt_send_request_wrapper);
+       if (ret_val != PJ_SUCCESS) {
+               char errmsg[PJ_ERR_MSG_SIZE];
+
+               /* Complain of failure to send the request. */
+               pj_strerror(ret_val, errmsg, sizeof(errmsg));
+               ast_log(LOG_ERROR, "Error %d '%s' sending %.*s request to endpoint %s\n",
+                       (int) ret_val, errmsg, (int) pj_strlen(&tdata->msg->line.req.method.name),
+                       pj_strbuf(&tdata->msg->line.req.method.name),
+                       endpoint ? ast_sorcery_object_get_id(endpoint) : "<unknown>");
+
+               /* Was the callback called? */
+               if (req_wrapper->cb_called) {
+                       /*
+                        * Yes so we cannot report any error.  The callback
+                        * has already freed any resources associated with
+                        * token.
+                        */
+                       ret_val = PJ_SUCCESS;
+               } else {
+                       /* No and it is not expected to ever be called. */
+                       ao2_ref(req_wrapper, -1);
                }
        }
-       AST_RWLIST_UNLOCK(&supplements);
+       ao2_ref(req_wrapper, -1);
+       return ret_val;
+}
 
-       if ((tsx->status_code == 401 || tsx->status_code == 407)
-               && req_data->endpoint
-               && !ast_sip_create_request_with_auth(&req_data->endpoint->outbound_auths, challenge, tsx, &tdata)
-               && pjsip_endpt_send_request(ast_sip_get_pjsip_endpoint(), tdata, -1, req_data->token, req_data->callback)
-                       == PJ_SUCCESS) {
-               return;
+static void send_request_cb(void *token, pjsip_event *e)
+{
+       struct send_request_data *req_data = token;
+       pjsip_transaction *tsx;
+       pjsip_rx_data *challenge;
+       pjsip_tx_data *tdata;
+       struct ast_sip_supplement *supplement;
+       struct ast_sip_endpoint *endpoint;
+       int res;
+
+       switch(e->body.tsx_state.type) {
+       case PJSIP_EVENT_TRANSPORT_ERROR:
+       case PJSIP_EVENT_TIMER:
+               break;
+       case PJSIP_EVENT_RX_MSG:
+               challenge = e->body.tsx_state.src.rdata;
+
+               /*
+                * Call any supplements that want to know about a response
+                * with any received data.
+                */
+               AST_RWLIST_RDLOCK(&supplements);
+               AST_LIST_TRAVERSE(&supplements, supplement, next) {
+                       if (supplement->incoming_response
+                               && does_method_match(&challenge->msg_info.cseq->method.name,
+                                       supplement->method)) {
+                               supplement->incoming_response(req_data->endpoint, challenge);
+                       }
+               }
+               AST_RWLIST_UNLOCK(&supplements);
+
+               /* Resend the request with a challenge response if we are challenged. */
+               tsx = e->body.tsx_state.tsx;
+               endpoint = ao2_bump(req_data->endpoint);
+               res = (tsx->status_code == 401 || tsx->status_code == 407)
+                       && endpoint
+                       && ++req_data->challenge_count < MAX_RX_CHALLENGES /* Not in a challenge loop */
+                       && !ast_sip_create_request_with_auth(&endpoint->outbound_auths,
+                               challenge, tsx, &tdata)
+                       && endpt_send_request(endpoint, tdata, -1, req_data, send_request_cb)
+                               == PJ_SUCCESS;
+               ao2_cleanup(endpoint);
+               if (res) {
+                       /*
+                        * Request with challenge response sent.
+                        * Passed our req_data ref to the new request.
+                        */
+                       return;
+               }
+               break;
+       default:
+               ast_log(LOG_ERROR, "Unexpected PJSIP event %d\n", e->body.tsx_state.type);
+               break;
        }
 
        if (req_data->callback) {
                req_data->callback(req_data->token, e);
        }
+       ao2_ref(req_data, -1);
 }
 
 static int send_out_of_dialog_request(pjsip_tx_data *tdata, struct ast_sip_endpoint *endpoint,
        void *token, void (*callback)(void *token, pjsip_event *e))
 {
        struct ast_sip_supplement *supplement;
-       struct send_request_data *req_data = send_request_data_alloc(endpoint, token, callback);
-       struct ast_sip_contact *contact = ast_sip_mod_data_get(tdata->mod_data, supplement_module.id, MOD_DATA_CONTACT);
+       struct send_request_data *req_data;
+       struct ast_sip_contact *contact;
 
+       req_data = send_request_data_alloc(endpoint, token, callback);
        if (!req_data) {
                pjsip_tx_data_dec_ref(tdata);
                return -1;
        }
 
+       contact = ast_sip_mod_data_get(tdata->mod_data, supplement_module.id, MOD_DATA_CONTACT);
+
        AST_RWLIST_RDLOCK(&supplements);
        AST_LIST_TRAVERSE(&supplements, supplement, next) {
-               if (supplement->outgoing_request && does_method_match(&tdata->msg->line.req.method.name, supplement->method)) {
+               if (supplement->outgoing_request
+                       && does_method_match(&tdata->msg->line.req.method.name, supplement->method)) {
                        supplement->outgoing_request(endpoint, contact, tdata);
                }
        }
@@ -2512,11 +2625,8 @@ static int send_out_of_dialog_request(pjsip_tx_data *tdata, struct ast_sip_endpo
        ast_sip_mod_data_set(tdata->pool, tdata->mod_data, supplement_module.id, MOD_DATA_CONTACT, NULL);
        ao2_cleanup(contact);
 
-       if (pjsip_endpt_send_request(ast_sip_get_pjsip_endpoint(), tdata, -1, req_data, send_request_cb) != PJ_SUCCESS) {
-               ast_log(LOG_ERROR, "Error attempting to send outbound %.*s request to endpoint %s\n",
-                               (int) pj_strlen(&tdata->msg->line.req.method.name),
-                               pj_strbuf(&tdata->msg->line.req.method.name),
-                               endpoint ? ast_sorcery_object_get_id(endpoint) : "<unknown>");
+       if (endpt_send_request(endpoint, tdata, -1, req_data, send_request_cb)
+               != PJ_SUCCESS) {
                ao2_cleanup(req_data);
                return -1;
        }
index 1c26251..453b21e 100644 (file)
@@ -113,7 +113,7 @@ static void update_contact_status(const struct ast_sip_contact *contact,
 
        /* if the contact is available calculate the rtt as
           the diff between the last start time and "now" */
-       update->rtt = update->status ?
+       update->rtt = update->status == AVAILABLE ?
                ast_tvdiff_us(ast_tvnow(), status->rtt_start) : 0;
 
        update->rtt_start = ast_tv(0, 0);
@@ -240,18 +240,21 @@ static struct ast_sip_endpoint *find_an_endpoint(struct ast_sip_contact *contact
 
 /*!
  * \internal
- * \brief Receive an response to the qualify contact request.
+ * \brief Receive a response to the qualify contact request.
  */
 static void qualify_contact_cb(void *token, pjsip_event *e)
 {
        struct ast_sip_contact *contact = token;
 
        switch(e->body.tsx_state.type) {
+       default:
+               ast_log(LOG_ERROR, "Unexpected PJSIP event %d\n", e->body.tsx_state.type);
+               /* Fall through */
        case PJSIP_EVENT_TRANSPORT_ERROR:
        case PJSIP_EVENT_TIMER:
                update_contact_status(contact, UNAVAILABLE);
                break;
-       default:
+       case PJSIP_EVENT_RX_MSG:
                update_contact_status(contact, AVAILABLE);
                break;
        }
@@ -308,6 +311,7 @@ static int qualify_contact(struct ast_sip_endpoint *endpoint, struct ast_sip_con
                != PJ_SUCCESS) {
                ast_log(LOG_ERROR, "Unable to send request to qualify contact %s\n",
                        contact->uri);
+               update_contact_status(contact, UNAVAILABLE);
                ao2_ref(contact, -1);
                return -1;
        }
@@ -354,10 +358,12 @@ static void sched_data_destructor(void *obj)
  */
 static struct sched_data *sched_data_create(struct ast_sip_contact *contact)
 {
-       struct sched_data *data = ao2_alloc(sizeof(*data), sched_data_destructor);
+       struct sched_data *data;
 
+       data = ao2_t_alloc(sizeof(*data), sched_data_destructor, contact->uri);
        if (!data) {
-               ast_log(LOG_ERROR, "Unable to create schedule qualify data\n");
+               ast_log(LOG_ERROR, "Unable to create schedule qualify data for contact %s\n",
+                       contact->uri);
                return NULL;
        }
 
@@ -392,11 +398,14 @@ static int qualify_contact_sched(const void *obj)
        ao2_ref(data->contact, +1);
        if (ast_sip_push_task(NULL, qualify_contact_task, data->contact)) {
                ao2_ref(data->contact, -1);
-               ao2_cleanup(data);
-               return 0;
        }
 
-       return data->contact->qualify_frequency * 1000;
+       /*
+        * Always reschedule rather than have a potential race cleaning
+        * up the data object ref between self deletion and an external
+        * deletion.
+        */
+       return 1;
 }
 
 /*!
@@ -405,24 +414,27 @@ static int qualify_contact_sched(const void *obj)
  */
 static void schedule_qualify(struct ast_sip_contact *contact)
 {
-       RAII_VAR(struct sched_data *, data, sched_data_create(contact), ao2_cleanup);
+       struct sched_data *data;
 
+       data = sched_data_create(contact);
        if (!data) {
                return;
        }
 
-       ao2_ref(data, +1);
-       if ((data->id = ast_sched_add_variable(
-                   sched, contact->qualify_frequency * 1000,
-                   qualify_contact_sched, data, 1)) < 0) {
+       ast_assert(contact->qualify_frequency != 0);
 
-               ao2_ref(data, -1);
+       ao2_t_ref(data, +1, "Ref for qualify_contact_sched() scheduler entry");
+       data->id = ast_sched_add_variable(sched, contact->qualify_frequency * 1000,
+               qualify_contact_sched, data, 0);
+       if (data->id < 0) {
+               ao2_t_ref(data, -1, "Cleanup failed scheduler add");
                ast_log(LOG_ERROR, "Unable to schedule qualify for contact %s\n",
                        contact->uri);
-               return;
+       } else if (!ao2_link(sched_qualifies, data)) {
+               AST_SCHED_DEL_UNREF(sched, data->id,
+                       ao2_t_ref(data, -1, "Cleanup scheduler for failed ao2_link"));
        }
-
-       ao2_link(sched_qualifies, data);
+       ao2_t_ref(data, -1, "Done setting up scheduler entry");
 }
 
 /*!
@@ -438,8 +450,9 @@ static void unschedule_qualify(struct ast_sip_contact *contact)
                return;
        }
 
-       AST_SCHED_DEL_UNREF(sched, data->id, ao2_cleanup(data));
-       ao2_ref(data, -1);
+       AST_SCHED_DEL_UNREF(sched, data->id,
+               ao2_t_ref(data, -1, "Delete scheduler entry ref"));
+       ao2_t_ref(data, -1, "Done with ao2_find ref");
 }
 
 /*!
@@ -452,7 +465,9 @@ static void qualify_and_schedule(struct ast_sip_contact *contact)
 
        if (contact->qualify_frequency) {
                ao2_ref(contact, +1);
-               ast_sip_push_task(NULL, qualify_contact_task, contact);
+               if (ast_sip_push_task(NULL, qualify_contact_task, contact)) {
+                       ao2_ref(contact, -1);
+               }
 
                schedule_qualify(contact);
        }
@@ -498,24 +513,45 @@ static const struct ast_sorcery_observer contact_observer = {
 
 static pj_bool_t options_start(void)
 {
-       if (!(sched = ast_sched_context_create()) ||
-           ast_sched_start_thread(sched)) {
+       sched = ast_sched_context_create();
+       if (!sched) {
+               return -1;
+       }
+       if (ast_sched_start_thread(sched)) {
+               ast_sched_context_destroy(sched);
+               sched = NULL;
+               return -1;
+       }
+
+       if (ast_sorcery_observer_add(ast_sip_get_sorcery(), "contact", &contact_observer)) {
+               ast_log(LOG_WARNING, "Unable to add contact observer\n");
+               ast_sched_context_destroy(sched);
+               sched = NULL;
                return -1;
        }
 
        return PJ_SUCCESS;
 }
 
+static int sched_qualifies_empty(void *obj, void *arg, int flags)
+{
+       ao2_t_ref(obj, -1, "Release ref held by destroyed scheduler context.");
+       return CMP_MATCH;
+}
+
 static pj_bool_t options_stop(void)
 {
        ast_sorcery_observer_remove(ast_sip_get_sorcery(), "contact", &contact_observer);
 
-       ao2_t_ref(sched_qualifies, -1, "Remove scheduled qualifies on module stop");
-
        if (sched) {
                ast_sched_context_destroy(sched);
+               sched = NULL;
        }
 
+       /* Empty the container of scheduling data refs. */
+       ao2_callback(sched_qualifies, OBJ_UNLINK | OBJ_NODATA | OBJ_MULTIPLE,
+               sched_qualifies_empty, NULL);
+
        return PJ_SUCCESS;
 }
 
@@ -1022,39 +1058,39 @@ static struct ast_sip_endpoint_formatter contact_status_formatter = {
 
 int ast_res_pjsip_init_options_handling(int reload)
 {
-       const pj_str_t STR_OPTIONS = { "OPTIONS", 7 };
+       static const pj_str_t STR_OPTIONS = { "OPTIONS", 7 };
 
        if (reload) {
                qualify_and_schedule_all();
                return 0;
        }
 
-       if (!(sched_qualifies = ao2_t_container_alloc(
-               QUALIFIED_BUCKETS, sched_qualifies_hash_fn, sched_qualifies_cmp_fn,
-               "Create container for scheduled qualifies"))) {
+       sched_qualifies = ao2_t_container_alloc(QUALIFIED_BUCKETS,
+               sched_qualifies_hash_fn, sched_qualifies_cmp_fn,
+               "Create container for scheduled qualifies");
+       if (!sched_qualifies) {
                return -1;
        }
 
        if (pjsip_endpt_register_module(ast_sip_get_pjsip_endpoint(), &options_module) != PJ_SUCCESS) {
-               options_stop();
+               ao2_cleanup(sched_qualifies);
+               sched_qualifies = NULL;
                return -1;
        }
 
-       if (pjsip_endpt_add_capability(ast_sip_get_pjsip_endpoint(), NULL, PJSIP_H_ALLOW, NULL, 1, &STR_OPTIONS) != PJ_SUCCESS) {
+       if (pjsip_endpt_add_capability(ast_sip_get_pjsip_endpoint(), NULL, PJSIP_H_ALLOW,
+               NULL, 1, &STR_OPTIONS) != PJ_SUCCESS) {
                pjsip_endpt_unregister_module(ast_sip_get_pjsip_endpoint(), &options_module);
-               return -1;
-       }
-
-       if (ast_sorcery_observer_add(ast_sip_get_sorcery(), "contact", &contact_observer)) {
-               ast_log(LOG_WARNING, "Unable to add contact observer\n");
+               ao2_cleanup(sched_qualifies);
+               sched_qualifies = NULL;
                return -1;
        }
 
        ast_sip_register_endpoint_formatter(&contact_status_formatter);
+       ast_manager_register2("PJSIPQualify", EVENT_FLAG_SYSTEM | EVENT_FLAG_REPORTING, ami_sip_qualify, NULL, NULL, NULL);
+       ast_cli_register_multiple(cli_options, ARRAY_LEN(cli_options));
 
        qualify_and_schedule_all();
-       ast_cli_register_multiple(cli_options, ARRAY_LEN(cli_options));
-       ast_manager_register2("PJSIPQualify", EVENT_FLAG_SYSTEM | EVENT_FLAG_REPORTING, ami_sip_qualify, NULL, NULL, NULL);
 
        return 0;
 }
@@ -1064,4 +1100,8 @@ void ast_res_pjsip_cleanup_options_handling(void)
        ast_cli_unregister_multiple(cli_options, ARRAY_LEN(cli_options));
        ast_manager_unregister("PJSIPQualify");
        ast_sip_unregister_endpoint_formatter(&contact_status_formatter);
+
+       pjsip_endpt_unregister_module(ast_sip_get_pjsip_endpoint(), &options_module);
+       ao2_cleanup(sched_qualifies);
+       sched_qualifies = NULL;
 }