media formats: re-architect handling of media for performance improvements
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 /*** MODULEINFO
29         <support_level>core</support_level>
30         <depend>res_speech</depend>
31  ***/
32
33 #include "asterisk.h"
34
35 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
36
37 #include "asterisk/file.h"
38 #include "asterisk/channel.h"
39 #include "asterisk/pbx.h"
40 #include "asterisk/module.h"
41 #include "asterisk/lock.h"
42 #include "asterisk/app.h"
43 #include "asterisk/speech.h"
44
45 /*** DOCUMENTATION
46         <application name="SpeechCreate" language="en_US">
47                 <synopsis>
48                         Create a Speech Structure.
49                 </synopsis>
50                 <syntax>
51                         <parameter name="engine_name" required="true" />
52                 </syntax>
53                 <description>
54                         <para>This application creates information to be used by all the other applications.
55                         It must be called before doing any speech recognition activities such as activating a grammar.
56                         It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
57                         <para>Sets the ERROR channel variable to 1 if the engine cannot be used.</para>
58                 </description>
59         </application>
60         <application name="SpeechActivateGrammar" language="en_US">
61                 <synopsis>
62                         Activate a grammar.
63                 </synopsis>
64                 <syntax>
65                         <parameter name="grammar_name" required="true" />
66                 </syntax>
67                 <description>
68                         <para>This activates the specified grammar to be recognized by the engine.
69                         A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
70                         in the dialplan. The grammar name is the only argument to this application.</para>
71                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
72                 </description>
73         </application>
74         <application name="SpeechStart" language="en_US">
75                 <synopsis>
76                         Start recognizing voice in the audio stream.
77                 </synopsis>
78                 <syntax />
79                 <description>
80                         <para>Tell the speech recognition engine that it should start trying to get results from audio being
81                         fed to it.</para>
82                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
83                 </description>
84         </application>
85         <application name="SpeechBackground" language="en_US">
86                 <synopsis>
87                         Play a sound file and wait for speech to be recognized.
88                 </synopsis>
89                 <syntax>
90                         <parameter name="sound_file" required="true" />
91                         <parameter name="timeout">
92                                 <para>Timeout integer in seconds. Note the timeout will only start
93                                 once the sound file has stopped playing.</para>
94                         </parameter>
95                         <parameter name="options">
96                                 <optionlist>
97                                         <option name="n">
98                                                 <para>Don't answer the channel if it has not already been answered.</para>
99                                         </option>
100                                 </optionlist>
101                         </parameter>
102                 </syntax>
103                 <description>
104                         <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
105                         of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
106                         the speech recognition engine is working. Once results are available the application returns and results
107                         (score and text) are available using dialplan functions.</para>
108                         <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
109                         and ${SPEECH_SCORE(1)}.</para>
110                         <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
111                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
112                         
113                 </description>
114         </application>
115         <application name="SpeechDeactivateGrammar" language="en_US">
116                 <synopsis>
117                         Deactivate a grammar.
118                 </synopsis>
119                 <syntax>
120                         <parameter name="grammar_name" required="true">
121                                 <para>The grammar name to deactivate</para>
122                         </parameter>
123                 </syntax>
124                 <description>
125                         <para>This deactivates the specified grammar so that it is no longer recognized.</para>
126                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
127                 </description>
128         </application>
129         <application name="SpeechProcessingSound" language="en_US">
130                 <synopsis>
131                         Change background processing sound.
132                 </synopsis>
133                 <syntax>
134                         <parameter name="sound_file" required="true" />
135                 </syntax>
136                 <description>
137                         <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
138                         processing and working to get results.</para>
139                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
140                 </description>
141         </application>
142         <application name="SpeechDestroy" language="en_US">
143                 <synopsis>
144                         End speech recognition.
145                 </synopsis>
146                 <syntax />
147                 <description>
148                         <para>This destroys the information used by all the other speech recognition applications.
149                         If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
150                         again before calling any other application.</para>
151                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
152                 </description>
153         </application>
154         <application name="SpeechLoadGrammar" language="en_US">
155                 <synopsis>
156                         Load a grammar.
157                 </synopsis>
158                 <syntax>
159                         <parameter name="grammar_name" required="true" />
160                         <parameter name="path" required="true" />
161                 </syntax>
162                 <description>
163                         <para>Load a grammar only on the channel, not globally.</para>
164                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
165                 </description>
166         </application>
167         <application name="SpeechUnloadGrammar" language="en_US">
168                 <synopsis>
169                         Unload a grammar.
170                 </synopsis>
171                 <syntax>
172                         <parameter name="grammar_name" required="true" />
173                 </syntax>
174                 <description>
175                         <para>Unload a grammar.</para>
176                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
177                 </description>
178         </application>
179         <function name="SPEECH_SCORE" language="en_US">
180                 <synopsis>
181                         Gets the confidence score of a result.
182                 </synopsis>
183                 <syntax argsep="/">
184                         <parameter name="nbest_number" />
185                         <parameter name="result_number" required="true" />
186                 </syntax>
187                 <description>
188                         <para>Gets the confidence score of a result.</para>
189                 </description>
190         </function>
191         <function name="SPEECH_TEXT" language="en_US">
192                 <synopsis>
193                         Gets the recognized text of a result.
194                 </synopsis>
195                 <syntax argsep="/">
196                         <parameter name="nbest_number" />
197                         <parameter name="result_number" required="true" />
198                 </syntax>
199                 <description>
200                         <para>Gets the recognized text of a result.</para>
201                 </description>
202         </function>
203         <function name="SPEECH_GRAMMAR" language="en_US">
204                 <synopsis>
205                         Gets the matched grammar of a result if available.
206                 </synopsis>
207                 <syntax argsep="/">
208                         <parameter name="nbest_number" />
209                         <parameter name="result_number" required="true" />
210                 </syntax>
211                 <description>
212                         <para>Gets the matched grammar of a result if available.</para>
213                 </description>
214         </function>
215         <function name="SPEECH_ENGINE" language="en_US">
216                 <synopsis>
217                         Get or change a speech engine specific attribute.
218                 </synopsis>
219                 <syntax>
220                         <parameter name="name" required="true" />
221                 </syntax>
222                 <description>
223                         <para>Changes a speech engine specific attribute.</para>
224                 </description>
225         </function>
226         <function name="SPEECH_RESULTS_TYPE" language="en_US">
227                 <synopsis>
228                         Sets the type of results that will be returned.
229                 </synopsis>
230                 <syntax />
231                 <description>
232                         <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
233                 </description>
234         </function>
235         <function name="SPEECH" language="en_US">
236                 <synopsis>
237                         Gets information about speech recognition results.
238                 </synopsis>
239                 <syntax>
240                         <parameter name="argument" required="true">
241                                 <enumlist>
242                                         <enum name="status">
243                                                 <para>Returns <literal>1</literal> upon speech object existing,
244                                                 or <literal>0</literal> if not</para>
245                                         </enum>
246                                         <enum name="spoke">
247                                                 <para>Returns <literal>1</literal> if spoker spoke,
248                                                 or <literal>0</literal> if not</para>
249                                         </enum>
250                                         <enum name="results">
251                                                 <para>Returns number of results that were recognized.</para>
252                                         </enum>
253                                 </enumlist>
254                         </parameter>
255                 </syntax>
256                 <description>
257                         <para>Gets information about speech recognition results.</para>
258                 </description>
259         </function>
260  ***/
261
262 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
263 static void destroy_callback(void *data)
264 {
265         struct ast_speech *speech = (struct ast_speech*)data;
266
267         if (speech == NULL) {
268                 return;
269         }
270
271         /* Deallocate now */
272         ast_speech_destroy(speech);
273
274         return;
275 }
276
277 /*! \brief Static structure for datastore information */
278 static const struct ast_datastore_info speech_datastore = {
279         .type = "speech",
280         .destroy = destroy_callback
281 };
282
283 /*! \brief Helper function used to find the speech structure attached to a channel */
284 static struct ast_speech *find_speech(struct ast_channel *chan)
285 {
286         struct ast_speech *speech = NULL;
287         struct ast_datastore *datastore = NULL;
288
289         if (!chan) {
290                 return NULL;
291         }
292
293         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
294         if (datastore == NULL) {
295                 return NULL;
296         }
297         speech = datastore->data;
298
299         return speech;
300 }
301
302 /* Helper function to find a specific speech recognition result by number and nbest alternative */
303 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
304 {
305         struct ast_speech_result *result = results;
306         char *tmp = NULL;
307         int nbest_num = 0, wanted_num = 0, i = 0;
308
309         if (!result) {
310                 return NULL;
311         }
312
313         if ((tmp = strchr(result_num, '/'))) {
314                 *tmp++ = '\0';
315                 nbest_num = atoi(result_num);
316                 wanted_num = atoi(tmp);
317         } else {
318                 wanted_num = atoi(result_num);
319         }
320
321         do {
322                 if (result->nbest_num != nbest_num)
323                         continue;
324                 if (i == wanted_num)
325                         break;
326                 i++;
327         } while ((result = AST_LIST_NEXT(result, list)));
328
329         return result;
330 }
331
332 /*! \brief SPEECH_SCORE() Dialplan Function */
333 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
334                        char *buf, size_t len)
335 {
336         struct ast_speech_result *result = NULL;
337         struct ast_speech *speech = find_speech(chan);
338         char tmp[128] = "";
339
340         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
341                 return -1;
342         }
343         
344         snprintf(tmp, sizeof(tmp), "%d", result->score);
345         
346         ast_copy_string(buf, tmp, len);
347
348         return 0;
349 }
350
351 static struct ast_custom_function speech_score_function = {
352         .name = "SPEECH_SCORE",
353         .read = speech_score,
354         .write = NULL,
355 };
356
357 /*! \brief SPEECH_TEXT() Dialplan Function */
358 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
359                         char *buf, size_t len)
360 {
361         struct ast_speech_result *result = NULL;
362         struct ast_speech *speech = find_speech(chan);
363
364         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
365                 return -1;
366         }
367
368         if (result->text != NULL) {
369                 ast_copy_string(buf, result->text, len);
370         } else {
371                 buf[0] = '\0';
372         }
373
374         return 0;
375 }
376
377 static struct ast_custom_function speech_text_function = {
378         .name = "SPEECH_TEXT",
379         .read = speech_text,
380         .write = NULL,
381 };
382
383 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
384 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
385                         char *buf, size_t len)
386 {
387         struct ast_speech_result *result = NULL;
388         struct ast_speech *speech = find_speech(chan);
389
390         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
391                 return -1;
392         }
393
394         if (result->grammar != NULL) {
395                 ast_copy_string(buf, result->grammar, len);
396         } else {
397                 buf[0] = '\0';
398         }
399
400         return 0;
401 }
402
403 static struct ast_custom_function speech_grammar_function = {
404         .name = "SPEECH_GRAMMAR",
405         .read = speech_grammar,
406         .write = NULL,
407 };
408
409 /*! \brief SPEECH_ENGINE() Dialplan Set Function */
410 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
411 {
412         struct ast_speech *speech = find_speech(chan);
413
414         if (data == NULL || speech == NULL) {
415                 return -1;
416         }
417
418         ast_speech_change(speech, data, value);
419
420         return 0;
421 }
422
423 /*! \brief SPEECH_ENGINE() Dialplan Get Function */
424 static int speech_engine_read(struct ast_channel *chan, const char *cmd, char *data, char *buf, size_t len)
425 {
426         struct ast_speech *speech = find_speech(chan);
427
428         if (!data || !speech) {
429                 return -1;
430         }
431
432         return ast_speech_get_setting(speech, data, buf, len);
433 }
434
435 static struct ast_custom_function speech_engine_function = {
436         .name = "SPEECH_ENGINE",
437         .read = speech_engine_read,
438         .write = speech_engine_write,
439 };
440
441 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
442 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
443 {
444         struct ast_speech *speech = find_speech(chan);
445
446         if (data == NULL || speech == NULL)
447                 return -1;
448
449         if (!strcasecmp(value, "normal"))
450                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
451         else if (!strcasecmp(value, "nbest"))
452                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
453
454         return 0;
455 }
456
457 static struct ast_custom_function speech_results_type_function = {
458         .name = "SPEECH_RESULTS_TYPE",
459         .read = NULL,
460         .write = speech_results_type_write,
461 };
462
463 /*! \brief SPEECH() Dialplan Function */
464 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
465                         char *buf, size_t len)
466 {
467         int results = 0;
468         struct ast_speech_result *result = NULL;
469         struct ast_speech *speech = find_speech(chan);
470         char tmp[128] = "";
471
472         /* Now go for the various options */
473         if (!strcasecmp(data, "status")) {
474                 if (speech != NULL)
475                         ast_copy_string(buf, "1", len);
476                 else
477                         ast_copy_string(buf, "0", len);
478                 return 0;
479         }
480
481         /* Make sure we have a speech structure for everything else */
482         if (speech == NULL) {
483                 return -1;
484         }
485
486         /* Check to see if they are checking for silence */
487         if (!strcasecmp(data, "spoke")) {
488                 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
489                         ast_copy_string(buf, "1", len);
490                 else
491                         ast_copy_string(buf, "0", len);
492         } else if (!strcasecmp(data, "results")) {
493                 /* Count number of results */
494                 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
495                         results++;
496                 snprintf(tmp, sizeof(tmp), "%d", results);
497                 ast_copy_string(buf, tmp, len);
498         } else {
499                 buf[0] = '\0';
500         }
501
502         return 0;
503 }
504
505 static struct ast_custom_function speech_function = {
506         .name = "SPEECH",
507         .read = speech_read,
508         .write = NULL,
509 };
510
511
512
513 /*! \brief SpeechCreate() Dialplan Application */
514 static int speech_create(struct ast_channel *chan, const char *data)
515 {
516         struct ast_speech *speech = NULL;
517         struct ast_datastore *datastore = NULL;
518
519         /* Request a speech object */
520         speech = ast_speech_new(data, ast_channel_nativeformats(chan));
521         if (speech == NULL) {
522                 /* Not available */
523                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
524                 return 0;
525         }
526
527         datastore = ast_datastore_alloc(&speech_datastore, NULL);
528         if (datastore == NULL) {
529                 ast_speech_destroy(speech);
530                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
531                 return 0;
532         }
533         pbx_builtin_setvar_helper(chan, "ERROR", NULL);
534         datastore->data = speech;
535         ast_channel_datastore_add(chan, datastore);
536
537         return 0;
538 }
539
540 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
541 static int speech_load(struct ast_channel *chan, const char *vdata)
542 {
543         int res = 0;
544         struct ast_speech *speech = find_speech(chan);
545         char *data;
546         AST_DECLARE_APP_ARGS(args,
547                 AST_APP_ARG(grammar);
548                 AST_APP_ARG(path);
549         );
550
551         data = ast_strdupa(vdata);
552         AST_STANDARD_APP_ARGS(args, data);
553
554         if (speech == NULL)
555                 return -1;
556
557         if (args.argc != 2)
558                 return -1;
559
560         /* Load the grammar locally on the object */
561         res = ast_speech_grammar_load(speech, args.grammar, args.path);
562
563         return res;
564 }
565
566 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
567 static int speech_unload(struct ast_channel *chan, const char *data)
568 {
569         int res = 0;
570         struct ast_speech *speech = find_speech(chan);
571
572         if (speech == NULL)
573                 return -1;
574
575         /* Unload the grammar */
576         res = ast_speech_grammar_unload(speech, data);
577
578         return res;
579 }
580
581 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
582 static int speech_deactivate(struct ast_channel *chan, const char *data)
583 {
584         int res = 0;
585         struct ast_speech *speech = find_speech(chan);
586
587         if (speech == NULL)
588                 return -1;
589
590         /* Deactivate the grammar on the speech object */
591         res = ast_speech_grammar_deactivate(speech, data);
592
593         return res;
594 }
595
596 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
597 static int speech_activate(struct ast_channel *chan, const char *data)
598 {
599         int res = 0;
600         struct ast_speech *speech = find_speech(chan);
601
602         if (speech == NULL)
603                 return -1;
604
605         /* Activate the grammar on the speech object */
606         res = ast_speech_grammar_activate(speech, data);
607
608         return res;
609 }
610
611 /*! \brief SpeechStart() Dialplan Application */
612 static int speech_start(struct ast_channel *chan, const char *data)
613 {
614         int res = 0;
615         struct ast_speech *speech = find_speech(chan);
616
617         if (speech == NULL)
618                 return -1;
619
620         ast_speech_start(speech);
621
622         return res;
623 }
624
625 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
626 static int speech_processing_sound(struct ast_channel *chan, const char *data)
627 {
628         int res = 0;
629         struct ast_speech *speech = find_speech(chan);
630
631         if (speech == NULL)
632                 return -1;
633
634         if (speech->processing_sound != NULL) {
635                 ast_free(speech->processing_sound);
636                 speech->processing_sound = NULL;
637         }
638
639         speech->processing_sound = ast_strdup(data);
640
641         return res;
642 }
643
644 /*! \brief Helper function used by speech_background to playback a soundfile */
645 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
646 {
647         struct ast_filestream *fs = NULL;
648
649         if (!(fs = ast_openstream(chan, filename, preflang)))
650                 return -1;
651         
652         if (ast_applystream(chan, fs))
653                 return -1;
654         
655         ast_playstream(fs);
656
657         return 0;
658 }
659
660 enum {
661         SB_OPT_NOANSWER = (1 << 0),
662 };
663
664 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
665         AST_APP_OPTION('n', SB_OPT_NOANSWER),
666 END_OPTIONS );
667
668 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
669 static int speech_background(struct ast_channel *chan, const char *data)
670 {
671         unsigned int timeout = 0;
672         int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
673         struct ast_speech *speech = find_speech(chan);
674         struct ast_frame *f = NULL;
675         RAII_VAR(struct ast_format *, oldreadformat, NULL, ao2_cleanup);
676         char dtmf[AST_MAX_EXTENSION] = "";
677         struct timeval start = { 0, 0 }, current;
678         struct ast_datastore *datastore = NULL;
679         char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
680         const char *tmp2 = NULL;
681         struct ast_flags options = { 0 };
682         AST_DECLARE_APP_ARGS(args,
683                 AST_APP_ARG(soundfile);
684                 AST_APP_ARG(timeout);
685                 AST_APP_ARG(options);
686         );
687
688         parse = ast_strdupa(data);
689         AST_STANDARD_APP_ARGS(args, parse);
690
691         if (speech == NULL)
692                 return -1;
693
694         if (!ast_strlen_zero(args.options)) {
695                 char *options_buf = ast_strdupa(args.options);
696                 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
697         }
698
699         /* If channel is not already answered, then answer it */
700         if (ast_channel_state(chan) != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
701                 && ast_answer(chan)) {
702                         return -1;
703         }
704
705         /* Record old read format */
706         oldreadformat = ao2_bump(ast_channel_readformat(chan));
707
708         /* Change read format to be signed linear */
709         if (ast_set_read_format(chan, speech->format))
710                 return -1;
711
712         if (!ast_strlen_zero(args.soundfile)) {
713                 /* Yay sound file */
714                 filename_tmp = ast_strdupa(args.soundfile);
715                 if (!ast_strlen_zero(args.timeout)) {
716                         if ((timeout = atof(args.timeout) * 1000.0) == 0)
717                                 timeout = -1;
718                 } else
719                         timeout = 0;
720         }
721
722         /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
723         ast_channel_lock(chan);
724         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
725                 max_dtmf_len = atoi(tmp2);
726         }
727         
728         /* See if a terminator is specified */
729         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
730                 if (ast_strlen_zero(tmp2))
731                         dtmf_terminator = '\0';
732                 else
733                         dtmf_terminator = tmp2[0];
734         }
735         ast_channel_unlock(chan);
736
737         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
738         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
739                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
740                 ast_speech_start(speech);
741         }
742
743         /* Ensure no streams are currently running */
744         ast_stopstream(chan);
745
746         /* Okay it's streaming so go into a loop grabbing frames! */
747         while (done == 0) {
748                 /* If the filename is null and stream is not running, start up a new sound file */
749                 if (!quieted && (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) && (filename = strsep(&filename_tmp, "&"))) {
750                         /* Discard old stream information */
751                         ast_stopstream(chan);
752                         /* Start new stream */
753                         speech_streamfile(chan, filename, ast_channel_language(chan));
754                 }
755
756                 /* Run scheduled stuff */
757                 ast_sched_runq(ast_channel_sched(chan));
758
759                 /* Yay scheduling */
760                 res = ast_sched_wait(ast_channel_sched(chan));
761                 if (res < 0)
762                         res = 1000;
763
764                 /* If there is a frame waiting, get it - if not - oh well */
765                 if (ast_waitfor(chan, res) > 0) {
766                         f = ast_read(chan);
767                         if (f == NULL) {
768                                 /* The channel has hung up most likely */
769                                 done = 3;
770                                 break;
771                         }
772                 }
773
774                 /* Do timeout check (shared between audio/dtmf) */
775                 if ((!quieted || strlen(dtmf)) && started == 1) {
776                         current = ast_tvnow();
777                         if ((ast_tvdiff_ms(current, start)) >= timeout) {
778                                 done = 1;
779                                 if (f)
780                                         ast_frfree(f);
781                                 break;
782                         }
783                 }
784
785                 /* Do checks on speech structure to see if it's changed */
786                 ast_mutex_lock(&speech->lock);
787                 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
788                         if (ast_channel_stream(chan))
789                                 ast_stopstream(chan);
790                         ast_clear_flag(speech, AST_SPEECH_QUIET);
791                         quieted = 1;
792                 }
793                 /* Check state so we can see what to do */
794                 switch (speech->state) {
795                 case AST_SPEECH_STATE_READY:
796                         /* If audio playback has stopped do a check for timeout purposes */
797                         if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL)
798                                 ast_stopstream(chan);
799                         if (!quieted && ast_channel_stream(chan) == NULL && timeout && started == 0 && !filename_tmp) {
800                                 if (timeout == -1) {
801                                         done = 1;
802                                         if (f)
803                                                 ast_frfree(f);
804                                         break;
805                                 }
806                                 start = ast_tvnow();
807                                 started = 1;
808                         }
809                         /* Write audio frame out to speech engine if no DTMF has been received */
810                         if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
811                                 ast_speech_write(speech, f->data.ptr, f->datalen);
812                         }
813                         break;
814                 case AST_SPEECH_STATE_WAIT:
815                         /* Cue up waiting sound if not already playing */
816                         if (!strlen(dtmf)) {
817                                 if (ast_channel_stream(chan) == NULL) {
818                                         if (speech->processing_sound != NULL) {
819                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
820                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
821                                                 }
822                                         }
823                                 } else if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) {
824                                         ast_stopstream(chan);
825                                         if (speech->processing_sound != NULL) {
826                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
827                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
828                                                 }
829                                         }
830                                 }
831                         }
832                         break;
833                 case AST_SPEECH_STATE_DONE:
834                         /* Now that we are done... let's switch back to not ready state */
835                         ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
836                         if (!strlen(dtmf)) {
837                                 /* Copy to speech structure the results, if available */
838                                 speech->results = ast_speech_results_get(speech);
839                                 /* Break out of our background too */
840                                 done = 1;
841                                 /* Stop audio playback */
842                                 if (ast_channel_stream(chan) != NULL) {
843                                         ast_stopstream(chan);
844                                 }
845                         }
846                         break;
847                 default:
848                         break;
849                 }
850                 ast_mutex_unlock(&speech->lock);
851
852                 /* Deal with other frame types */
853                 if (f != NULL) {
854                         /* Free the frame we received */
855                         switch (f->frametype) {
856                         case AST_FRAME_DTMF:
857                                 if (dtmf_terminator != '\0' && f->subclass.integer == dtmf_terminator) {
858                                         done = 1;
859                                 } else {
860                                         quieted = 1;
861                                         if (ast_channel_stream(chan) != NULL) {
862                                                 ast_stopstream(chan);
863                                         }
864                                         if (!started) {
865                                                 /* Change timeout to be 5 seconds for DTMF input */
866                                                 timeout = (ast_channel_pbx(chan) && ast_channel_pbx(chan)->dtimeoutms) ? ast_channel_pbx(chan)->dtimeoutms : 5000;
867                                                 started = 1;
868                                         }
869                                         start = ast_tvnow();
870                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass.integer);
871                                         strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
872                                         /* If the maximum length of the DTMF has been reached, stop now */
873                                         if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
874                                                 done = 1;
875                                 }
876                                 break;
877                         case AST_FRAME_CONTROL:
878                                 switch (f->subclass.integer) {
879                                 case AST_CONTROL_HANGUP:
880                                         /* Since they hung up we should destroy the speech structure */
881                                         done = 3;
882                                 default:
883                                         break;
884                                 }
885                         default:
886                                 break;
887                         }
888                         ast_frfree(f);
889                         f = NULL;
890                 }
891         }
892
893         if (!ast_strlen_zero(dtmf)) {
894                 /* We sort of make a results entry */
895                 speech->results = ast_calloc(1, sizeof(*speech->results));
896                 if (speech->results != NULL) {
897                         ast_speech_dtmf(speech, dtmf);
898                         speech->results->score = 1000;
899                         speech->results->text = ast_strdup(dtmf);
900                         speech->results->grammar = ast_strdup("dtmf");
901                 }
902                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
903         }
904
905         /* See if it was because they hung up */
906         if (done == 3) {
907                 /* Destroy speech structure */
908                 ast_speech_destroy(speech);
909                 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
910                 if (datastore != NULL)
911                         ast_channel_datastore_remove(chan, datastore);
912         } else {
913                 /* Channel is okay so restore read format */
914                 ast_set_read_format(chan, oldreadformat);
915         }
916
917         return 0;
918 }
919
920
921 /*! \brief SpeechDestroy() Dialplan Application */
922 static int speech_destroy(struct ast_channel *chan, const char *data)
923 {
924         int res = 0;
925         struct ast_speech *speech = find_speech(chan);
926         struct ast_datastore *datastore = NULL;
927
928         if (speech == NULL)
929                 return -1;
930
931         /* Destroy speech structure */
932         ast_speech_destroy(speech);
933
934         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
935         if (datastore != NULL) {
936                 ast_channel_datastore_remove(chan, datastore);
937         }
938
939         return res;
940 }
941
942 static int unload_module(void)
943 {
944         int res = 0;
945
946         res = ast_unregister_application("SpeechCreate");
947         res |= ast_unregister_application("SpeechLoadGrammar");
948         res |= ast_unregister_application("SpeechUnloadGrammar");
949         res |= ast_unregister_application("SpeechActivateGrammar");
950         res |= ast_unregister_application("SpeechDeactivateGrammar");
951         res |= ast_unregister_application("SpeechStart");
952         res |= ast_unregister_application("SpeechBackground");
953         res |= ast_unregister_application("SpeechDestroy");
954         res |= ast_unregister_application("SpeechProcessingSound");
955         res |= ast_custom_function_unregister(&speech_function);
956         res |= ast_custom_function_unregister(&speech_score_function);
957         res |= ast_custom_function_unregister(&speech_text_function);
958         res |= ast_custom_function_unregister(&speech_grammar_function);
959         res |= ast_custom_function_unregister(&speech_engine_function);
960         res |= ast_custom_function_unregister(&speech_results_type_function);
961
962         return res;     
963 }
964
965 static int load_module(void)
966 {
967         int res = 0;
968
969         res = ast_register_application_xml("SpeechCreate", speech_create);
970         res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
971         res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
972         res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
973         res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
974         res |= ast_register_application_xml("SpeechStart", speech_start);
975         res |= ast_register_application_xml("SpeechBackground", speech_background);
976         res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
977         res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
978         res |= ast_custom_function_register(&speech_function);
979         res |= ast_custom_function_register(&speech_score_function);
980         res |= ast_custom_function_register(&speech_text_function);
981         res |= ast_custom_function_register(&speech_grammar_function);
982         res |= ast_custom_function_register(&speech_engine_function);
983         res |= ast_custom_function_register(&speech_results_type_function);
984
985         return res;
986 }
987
988 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Dialplan Speech Applications",
989                 .load = load_module,
990                 .unload = unload_module,
991                 .nonoptreq = "res_speech",
992                 );