res_pjsip: Failover when server is not available
authorKevin Harwell <kharwell@digium.com>
Tue, 30 Jun 2015 20:19:31 +0000 (15:19 -0500)
committerKevin Harwell <kharwell@digium.com>
Mon, 6 Jul 2015 15:49:08 +0000 (10:49 -0500)
Previously Asterisk did not properly failover to the next resolved DNS
address when a endpoint could not be reached. With this patch, and while
using res_pjsip, SIP requests (both in/out of dialog) now attempt to use
the next address in the list of resolved addresses until a proper response
is received or no more addresses are left.

ASTERISK-25076 #close
Reported by: Joshua Colp

Change-Id: Ief14f4ebd82474881f72f4538f4577f30af2a764

include/asterisk/res_pjsip.h
res/res_pjsip.c
res/res_pjsip_session.c

index 9475d6d..f199b8f 100644 (file)
@@ -2044,4 +2044,13 @@ unsigned int ast_sip_get_max_initial_qualify_time(void);
 const char *ast_sip_get_contact_status_label(const enum ast_sip_contact_status_type status);
 const char *ast_sip_get_contact_short_status_label(const enum ast_sip_contact_status_type status);
 
+/*!
+ * \brief Set a request to use the next value in the list of resolved addresses.
+ *
+ * \param tdata the tx data from the original request
+ * \retval 0 No more addresses to try
+ * \retval 1 The request was successfully re-intialized
+ */
+int ast_sip_failover_request(pjsip_tx_data *tdata);
+
 #endif /* _RES_PJSIP_H */
index bb5bc03..6d7e4f7 100644 (file)
@@ -3135,19 +3135,88 @@ static pj_status_t endpt_send_request(struct ast_sip_endpoint *endpoint,
        return ret_val;
 }
 
+int ast_sip_failover_request(pjsip_tx_data *tdata)
+{
+       pjsip_via_hdr *via;
+
+       if (tdata->dest_info.cur_addr == tdata->dest_info.addr.count - 1) {
+               /* No more addresses to try */
+               return 0;
+       }
+
+       /* Try next address */
+       ++tdata->dest_info.cur_addr;
+
+       via = (pjsip_via_hdr*)pjsip_msg_find_hdr(tdata->msg, PJSIP_H_VIA, NULL);
+       via->branch_param.slen = 0;
+
+       pjsip_tx_data_invalidate_msg(tdata);
+
+       return 1;
+}
+
+static void send_request_cb(void *token, pjsip_event *e);
+
+static int check_request_status(struct send_request_data *req_data, pjsip_event *e)
+{
+       struct ast_sip_endpoint *endpoint;
+       pjsip_transaction *tsx;
+       pjsip_tx_data *tdata;
+       int res = 0;
+
+       if (!(endpoint = ao2_bump(req_data->endpoint))) {
+               return 0;
+       }
+
+       tsx = e->body.tsx_state.tsx;
+
+       switch (tsx->status_code) {
+       case 401:
+       case 407:
+               /* Resend the request with a challenge response if we are challenged. */
+               res = ++req_data->challenge_count < MAX_RX_CHALLENGES /* Not in a challenge loop */
+                       && !ast_sip_create_request_with_auth(&endpoint->outbound_auths,
+                               e->body.tsx_state.src.rdata, tsx->last_tx, &tdata);
+               break;
+       case 408:
+       case 503:
+               if ((res = ast_sip_failover_request(tsx->last_tx))) {
+                       tdata = tsx->last_tx;
+                       /*
+                        * Bump the ref since it will be on a new transaction and
+                        * we don't want it to go away along with the old transaction.
+                        */
+                       pjsip_tx_data_add_ref(tdata);
+               }
+               break;
+       }
+
+       if (res) {
+               res = endpt_send_request(endpoint, tdata, -1,
+                                        req_data, send_request_cb) == PJ_SUCCESS;
+       }
+
+       ao2_ref(endpoint, -1);
+       return res;
+}
+
 static void send_request_cb(void *token, pjsip_event *e)
 {
        struct send_request_data *req_data = token;
-       pjsip_transaction *tsx;
        pjsip_rx_data *challenge;
-       pjsip_tx_data *tdata;
        struct ast_sip_supplement *supplement;
-       struct ast_sip_endpoint *endpoint;
-       int res;
 
        switch(e->body.tsx_state.type) {
        case PJSIP_EVENT_TRANSPORT_ERROR:
        case PJSIP_EVENT_TIMER:
+               /*
+                * Check the request status on transport error or timeout. A transport
+                * error can occur when a TCP socket closes and that can be the result
+                * of a 503. Also we may need to failover on a timeout (408).
+                */
+               if (check_request_status(req_data, e)) {
+                       return;
+               }
                break;
        case PJSIP_EVENT_RX_MSG:
                challenge = e->body.tsx_state.src.rdata;
@@ -3166,20 +3235,9 @@ static void send_request_cb(void *token, pjsip_event *e)
                }
                AST_RWLIST_UNLOCK(&supplements);
 
-               /* Resend the request with a challenge response if we are challenged. */
-               tsx = e->body.tsx_state.tsx;
-               endpoint = ao2_bump(req_data->endpoint);
-               res = (tsx->status_code == 401 || tsx->status_code == 407)
-                       && endpoint
-                       && ++req_data->challenge_count < MAX_RX_CHALLENGES /* Not in a challenge loop */
-                       && !ast_sip_create_request_with_auth(&endpoint->outbound_auths,
-                               challenge, tsx->last_tx, &tdata)
-                       && endpt_send_request(endpoint, tdata, -1, req_data, send_request_cb)
-                               == PJ_SUCCESS;
-               ao2_cleanup(endpoint);
-               if (res) {
+               if (check_request_status(req_data, e)) {
                        /*
-                        * Request with challenge response sent.
+                        * Request with challenge response or failover sent.
                         * Passed our req_data ref to the new request.
                         */
                        return;
index bbd74ee..c729594 100644 (file)
@@ -2267,6 +2267,29 @@ static int session_end(struct ast_sip_session *session)
        return 0;
 }
 
+static int check_request_status(pjsip_inv_session *inv, pjsip_event *e)
+{
+       struct ast_sip_session *session = inv->mod_data[session_module.id];
+       pjsip_transaction *tsx = e->body.tsx_state.tsx;
+
+       if (tsx->status_code != 503 && tsx->status_code != 408) {
+               return 0;
+       }
+
+       if (!ast_sip_failover_request(tsx->last_tx)) {
+               return 0;
+       }
+
+       pjsip_inv_uac_restart(inv, PJ_FALSE);
+       /*
+        * Bump the ref since it will be on a new transaction and
+        * we don't want it to go away along with the old transaction.
+        */
+       pjsip_tx_data_add_ref(tsx->last_tx);
+       ast_sip_session_send_request(session, tsx->last_tx);
+       return 1;
+}
+
 static void session_inv_on_state_changed(pjsip_inv_session *inv, pjsip_event *e)
 {
        struct ast_sip_session *session = inv->mod_data[session_module.id];
@@ -2299,11 +2322,20 @@ static void session_inv_on_state_changed(pjsip_inv_session *inv, pjsip_event *e)
                        handle_outgoing(session, e->body.tsx_state.src.tdata);
                        break;
                case PJSIP_EVENT_RX_MSG:
-                       handle_incoming(session, e->body.tsx_state.src.rdata, type,
-                                       AST_SIP_SESSION_BEFORE_MEDIA);
+                       if (!check_request_status(inv, e)) {
+                               handle_incoming(session, e->body.tsx_state.src.rdata, type,
+                                               AST_SIP_SESSION_BEFORE_MEDIA);
+                       }
                        break;
                case PJSIP_EVENT_TRANSPORT_ERROR:
                case PJSIP_EVENT_TIMER:
+                       /*
+                        * Check the request status on transport error or timeout. A transport
+                        * error can occur when a TCP socket closes and that can be the result
+                        * of a 503. Also we may need to failover on a timeout (408).
+                        */
+                       check_request_status(inv, e);
+                       break;
                case PJSIP_EVENT_USER:
                case PJSIP_EVENT_UNKNOWN:
                case PJSIP_EVENT_TSX_STATE: