Merge "contrib/sip_to_pjsip: handle setvar in conversion"
[asterisk/asterisk.git] / funcs / func_talkdetect.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2014, Digium, Inc.
5  *
6  * Matt Jordan <mjordan@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Function that raises events when talking is detected on a channel
22  *
23  * \author Matt Jordan <mjordan@digium.com>
24  *
25  * \ingroup functions
26  */
27
28 /*** MODULEINFO
29         <support_level>core</support_level>
30  ***/
31
32 #include "asterisk.h"
33
34 #include "asterisk/module.h"
35 #include "asterisk/channel.h"
36 #include "asterisk/pbx.h"
37 #include "asterisk/app.h"
38 #include "asterisk/dsp.h"
39 #include "asterisk/audiohook.h"
40 #include "asterisk/stasis.h"
41 #include "asterisk/stasis_channels.h"
42
43 /*** DOCUMENTATION
44         <function name="TALK_DETECT" language="en_US">
45                 <synopsis>
46                         Raises notifications when Asterisk detects silence or talking on a channel.
47                 </synopsis>
48                 <syntax>
49                         <parameter name="action" required="true">
50                                 <optionlist>
51                                         <option name="remove">
52                                                 <para>W/O. Remove talk detection from the channel.</para>
53                                         </option>
54                                         <option name="set">
55                                                 <para>W/O. Enable TALK_DETECT and/or configure talk detection
56                                                 parameters. Can be called multiple times to change parameters
57                                                 on a channel with talk detection already enabled.</para>
58                                                 <argument name="dsp_silence_threshold" required="false">
59                                                         <para>The time in milliseconds before which a user is considered silent.</para>
60                                                 </argument>
61                                                 <argument name="dsp_talking_threshold" required="false">
62                                                         <para>The time in milliseconds after which a user is considered talking.</para>
63                                                 </argument>
64                                         </option>
65                                 </optionlist>
66                         </parameter>
67                 </syntax>
68                 <description>
69                         <para>The TALK_DETECT function enables events on the channel
70                         it is applied to. These events can be emited over AMI, ARI, and
71                         potentially other Asterisk modules that listen for the internal
72                         notification.</para>
73                         <para>The function has two parameters that can optionally be passed
74                         when <literal>set</literal> on a channel: <replaceable>dsp_talking_threshold</replaceable>
75                         and <replaceable>dsp_silence_threshold</replaceable>.</para>
76                         <para><replaceable>dsp_talking_threshold</replaceable> is the time in milliseconds of sound
77                         above what the dsp has established as base line silence for a user
78                         before a user is considered to be talking. By default, the value of
79                         <replaceable>silencethreshold</replaceable> from <filename>dsp.conf</filename>
80                         is used. If this value is set too tight events may be
81                         falsely triggered by variants in room noise.</para>
82                         <para>Valid values are 1 through 2^31.</para>
83                         <para><replaceable>dsp_silence_threshold</replaceable> is the time in milliseconds of sound
84                         falling within what the dsp has established as baseline silence before
85                         a user is considered be silent. If this value is set too low events
86                         indicating the user has stopped talking may get falsely sent out when
87                         the user briefly pauses during mid sentence.</para>
88                         <para>The best way to approach this option is to set it slightly above
89                         the maximum amount of ms of silence a user may generate during
90                         natural speech.</para>
91                         <para>By default this value is 2500ms. Valid values are 1
92                         through 2^31.</para>
93                         <para>Example:</para>
94                         <para>same => n,Set(TALK_DETECT(set)=)     ; Enable talk detection</para>
95                         <para>same => n,Set(TALK_DETECT(set)=1200) ; Update existing talk detection's silence threshold to 1200 ms</para>
96                         <para>same => n,Set(TALK_DETECT(remove)=)  ; Remove talk detection</para>
97                         <para>same => n,Set(TALK_DETECT(set)=,128) ; Enable and set talk threshold to 128</para>
98                         <para>This function will set the following variables:</para>
99                         <note>
100                                 <para>The TALK_DETECT function uses an audiohook to inspect the
101                                 voice media frames on a channel. Other functions, such as JITTERBUFFER,
102                                 DENOISE, and AGC use a similar mechanism. Audiohooks are processed
103                                 in the order in which they are placed on the channel. As such,
104                                 it typically makes sense to place functions that modify the voice
105                                 media data prior to placing the TALK_DETECT function, as this will
106                                 yield better results.</para>
107                                 <para>Example:</para>
108                                 <para>same => n,Set(DENOISE(rx)=on)    ; Denoise received audio</para>
109                                 <para>same => n,Set(TALK_DETECT(set)=) ; Perform talk detection on the denoised received audio</para>
110                         </note>
111                 </description>
112         </function>
113  ***/
114
115 #define DEFAULT_SILENCE_THRESHOLD 2500
116
117 /*! \brief Private data structure used with the function's datastore */
118 struct talk_detect_params {
119         /*! The audiohook for the function */
120         struct ast_audiohook audiohook;
121         /*! Our threshold above which we consider someone talking */
122         int dsp_talking_threshold;
123         /*! How long we'll wait before we decide someone is silent */
124         int dsp_silence_threshold;
125         /*! Whether or not the user is currently talking */
126         int talking;
127         /*! The time the current burst of talking started */
128         struct timeval talking_start;
129         /*! The DSP used to do the heavy lifting */
130         struct ast_dsp *dsp;
131 };
132
133 /*! \internal \brief Destroy the datastore */
134 static void datastore_destroy_cb(void *data) {
135         struct talk_detect_params *td_params = data;
136
137         ast_audiohook_destroy(&td_params->audiohook);
138
139         if (td_params->dsp) {
140                 ast_dsp_free(td_params->dsp);
141         }
142         ast_free(data);
143 }
144
145 /*! \brief The channel datastore the function uses to store state */
146 static const struct ast_datastore_info talk_detect_datastore = {
147         .type = "talk_detect",
148         .destroy = datastore_destroy_cb
149 };
150
151 /*! \internal \brief An audiohook modification callback
152  *
153  * This processes the read side of a channel's voice data to see if
154  * they are talking
155  *
156  * \note We don't actually modify the audio, so this function always
157  * returns a 'failure' indicating that it didn't modify the data
158  */
159 static int talk_detect_audiohook_cb(struct ast_audiohook *audiohook, struct ast_channel *chan, struct ast_frame *frame, enum ast_audiohook_direction direction)
160 {
161         int total_silence;
162         int update_talking = 0;
163         struct ast_datastore *datastore;
164         struct talk_detect_params *td_params;
165         struct stasis_message *message;
166
167         if (audiohook->status == AST_AUDIOHOOK_STATUS_DONE) {
168                 return 1;
169         }
170
171         if (direction != AST_AUDIOHOOK_DIRECTION_READ) {
172                 return 1;
173         }
174
175         if (frame->frametype != AST_FRAME_VOICE) {
176                 return 1;
177         }
178
179         if (!(datastore = ast_channel_datastore_find(chan, &talk_detect_datastore, NULL))) {
180                 return 1;
181         }
182         td_params = datastore->data;
183
184         ast_dsp_silence(td_params->dsp, frame, &total_silence);
185
186         if (total_silence < td_params->dsp_silence_threshold) {
187                 if (!td_params->talking) {
188                         update_talking = 1;
189                         td_params->talking_start = ast_tvnow();
190                 }
191                 td_params->talking = 1;
192         } else {
193                 if (td_params->talking) {
194                         update_talking = 1;
195                 }
196                 td_params->talking = 0;
197         }
198
199         if (update_talking) {
200                 struct ast_json *blob = NULL;
201
202                 if (!td_params->talking) {
203                         int64_t diff_ms = ast_tvdiff_ms(ast_tvnow(), td_params->talking_start);
204                         diff_ms -= td_params->dsp_silence_threshold;
205
206                         blob = ast_json_pack("{s: i}", "duration", diff_ms);
207                         if (!blob) {
208                                 return 1;
209                         }
210                 }
211
212                 ast_verb(4, "%s is now %s\n", ast_channel_name(chan),
213                             td_params->talking ? "talking" : "silent");
214                 message = ast_channel_blob_create_from_cache(ast_channel_uniqueid(chan),
215                                 td_params->talking ? ast_channel_talking_start() : ast_channel_talking_stop(),
216                                 blob);
217                 if (message) {
218                         stasis_publish(ast_channel_topic(chan), message);
219                         ao2_ref(message, -1);
220                 }
221
222                 ast_json_unref(blob);
223         }
224
225         return 1;
226 }
227
228 /*! \internal \brief Disable talk detection on the channel */
229 static int remove_talk_detect(struct ast_channel *chan)
230 {
231         struct ast_datastore *datastore = NULL;
232         struct talk_detect_params *td_params;
233         SCOPED_CHANNELLOCK(chan_lock, chan);
234
235         datastore = ast_channel_datastore_find(chan, &talk_detect_datastore, NULL);
236         if (!datastore) {
237                 ast_log(AST_LOG_WARNING, "Cannot remove TALK_DETECT from %s: TALK_DETECT not currently enabled\n",
238                         ast_channel_name(chan));
239                 return -1;
240         }
241         td_params = datastore->data;
242
243         if (ast_audiohook_remove(chan, &td_params->audiohook)) {
244                 ast_log(AST_LOG_WARNING, "Failed to remove TALK_DETECT audiohook from channel %s\n",
245                         ast_channel_name(chan));
246                 return -1;
247         }
248
249         if (ast_channel_datastore_remove(chan, datastore)) {
250                 ast_log(AST_LOG_WARNING, "Failed to remove TALK_DETECT datastore from channel %s\n",
251                         ast_channel_name(chan));
252                 return -1;
253         }
254         ast_datastore_free(datastore);
255
256         return 0;
257 }
258
259 /*! \internal \brief Enable talk detection on the channel */
260 static int set_talk_detect(struct ast_channel *chan, int dsp_silence_threshold, int dsp_talking_threshold)
261 {
262         struct ast_datastore *datastore = NULL;
263         struct talk_detect_params *td_params;
264         SCOPED_CHANNELLOCK(chan_lock, chan);
265
266         datastore = ast_channel_datastore_find(chan, &talk_detect_datastore, NULL);
267         if (!datastore) {
268                 datastore = ast_datastore_alloc(&talk_detect_datastore, NULL);
269                 if (!datastore) {
270                         return -1;
271                 }
272
273                 td_params = ast_calloc(1, sizeof(*td_params));
274                 if (!td_params) {
275                         ast_datastore_free(datastore);
276                         return -1;
277                 }
278
279                 ast_audiohook_init(&td_params->audiohook,
280                                    AST_AUDIOHOOK_TYPE_MANIPULATE,
281                                    "TALK_DETECT",
282                                    AST_AUDIOHOOK_MANIPULATE_ALL_RATES);
283                 td_params->audiohook.manipulate_callback = talk_detect_audiohook_cb;
284                 ast_set_flag(&td_params->audiohook, AST_AUDIOHOOK_TRIGGER_READ);
285
286                 td_params->dsp = ast_dsp_new_with_rate(ast_format_get_sample_rate(ast_channel_rawreadformat(chan)));
287                 if (!td_params->dsp) {
288                         ast_datastore_free(datastore);
289                         ast_free(td_params);
290                         return -1;
291                 }
292                 datastore->data = td_params;
293
294                 ast_channel_datastore_add(chan, datastore);
295                 ast_audiohook_attach(chan, &td_params->audiohook);
296         } else {
297                 /* Talk detection already enabled; update existing settings */
298                 td_params = datastore->data;
299         }
300
301         td_params->dsp_talking_threshold = dsp_talking_threshold;
302         td_params->dsp_silence_threshold = dsp_silence_threshold;
303
304         ast_dsp_set_threshold(td_params->dsp, td_params->dsp_talking_threshold);
305
306         return 0;
307 }
308
309 /*! \internal \brief TALK_DETECT write function callback */
310 static int talk_detect_fn_write(struct ast_channel *chan, const char *function, char *data, const char *value)
311 {
312         int res;
313
314         if (!chan) {
315                 return -1;
316         }
317
318         if (ast_strlen_zero(data)) {
319                 ast_log(AST_LOG_WARNING, "TALK_DETECT requires an argument\n");
320                 return -1;
321         }
322
323         if (!strcasecmp(data, "set")) {
324                 int dsp_silence_threshold = DEFAULT_SILENCE_THRESHOLD;
325                 int dsp_talking_threshold = ast_dsp_get_threshold_from_settings(THRESHOLD_SILENCE);
326
327                 if (!ast_strlen_zero(value)) {
328                         char *parse = ast_strdupa(value);
329
330                         AST_DECLARE_APP_ARGS(args,
331                                 AST_APP_ARG(silence_threshold);
332                                 AST_APP_ARG(talking_threshold);
333                         );
334
335                         AST_STANDARD_APP_ARGS(args, parse);
336
337                         if (!ast_strlen_zero(args.silence_threshold)) {
338                                 if (sscanf(args.silence_threshold, "%30d", &dsp_silence_threshold) != 1) {
339                                         ast_log(AST_LOG_WARNING, "Failed to parse %s for dsp_silence_threshold\n",
340                                                 args.silence_threshold);
341                                         return -1;
342                                 }
343
344                                 if (dsp_silence_threshold < 1) {
345                                         ast_log(AST_LOG_WARNING, "Invalid value %d for dsp_silence_threshold\n",
346                                                 dsp_silence_threshold);
347                                         return -1;
348                                 }
349                         }
350
351                         if (!ast_strlen_zero(args.talking_threshold)) {
352                                 if (sscanf(args.talking_threshold, "%30d", &dsp_talking_threshold) != 1) {
353                                         ast_log(AST_LOG_WARNING, "Failed to parse %s for dsp_talking_threshold\n",
354                                                 args.talking_threshold);
355                                         return -1;
356                                 }
357
358                                 if (dsp_talking_threshold < 1) {
359                                         ast_log(AST_LOG_WARNING, "Invalid value %d for dsp_talking_threshold\n",
360                                                 dsp_silence_threshold);
361                                         return -1;
362                                 }
363                         }
364                 }
365
366                 res = set_talk_detect(chan, dsp_silence_threshold, dsp_talking_threshold);
367         } else if (!strcasecmp(data, "remove")) {
368                 res = remove_talk_detect(chan);
369         } else {
370                 ast_log(AST_LOG_WARNING, "TALK_DETECT: unknown option %s\n", data);
371                 res = -1;
372         }
373
374         return res;
375 }
376
377 /*! \brief Definition of the TALK_DETECT function */
378 static struct ast_custom_function talk_detect_function = {
379         .name = "TALK_DETECT",
380         .write = talk_detect_fn_write,
381 };
382
383 /*! \internal \brief Unload the module */
384 static int unload_module(void)
385 {
386         int res = 0;
387
388         res |= ast_custom_function_unregister(&talk_detect_function);
389
390         return res;
391 }
392
393 /*! \internal \brief Load the module */
394 static int load_module(void)
395 {
396         int res = 0;
397
398         res |= ast_custom_function_register(&talk_detect_function);
399
400         return res ? AST_MODULE_LOAD_DECLINE : AST_MODULE_LOAD_SUCCESS;
401 }
402
403 AST_MODULE_INFO_STANDARD(ASTERISK_GPL_KEY, "Talk detection dialplan function");