e9ca63ea9177a7e6995f7d454fda01c6b5555305
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 /*** MODULEINFO
29         <support_level>core</support_level>
30         <depend>res_speech</depend>
31  ***/
32
33 #include "asterisk.h"
34
35 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
36
37 #include "asterisk/file.h"
38 #include "asterisk/channel.h"
39 #include "asterisk/pbx.h"
40 #include "asterisk/module.h"
41 #include "asterisk/lock.h"
42 #include "asterisk/app.h"
43 #include "asterisk/speech.h"
44
45 /*** DOCUMENTATION
46         <application name="SpeechCreate" language="en_US">
47                 <synopsis>
48                         Create a Speech Structure.
49                 </synopsis>
50                 <syntax>
51                         <parameter name="engine_name" required="true" />
52                 </syntax>
53                 <description>
54                         <para>This application creates information to be used by all the other applications.
55                         It must be called before doing any speech recognition activities such as activating a grammar.
56                         It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
57                         <para>Sets the ERROR channel variable to 1 if the engine cannot be used.</para>
58                 </description>
59         </application>
60         <application name="SpeechActivateGrammar" language="en_US">
61                 <synopsis>
62                         Activate a grammar.
63                 </synopsis>
64                 <syntax>
65                         <parameter name="grammar_name" required="true" />
66                 </syntax>
67                 <description>
68                         <para>This activates the specified grammar to be recognized by the engine.
69                         A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
70                         in the dialplan. The grammar name is the only argument to this application.</para>
71                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
72                 </description>
73         </application>
74         <application name="SpeechStart" language="en_US">
75                 <synopsis>
76                         Start recognizing voice in the audio stream.
77                 </synopsis>
78                 <syntax />
79                 <description>
80                         <para>Tell the speech recognition engine that it should start trying to get results from audio being
81                         fed to it.</para>
82                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
83                 </description>
84         </application>
85         <application name="SpeechBackground" language="en_US">
86                 <synopsis>
87                         Play a sound file and wait for speech to be recognized.
88                 </synopsis>
89                 <syntax>
90                         <parameter name="sound_file" required="true" />
91                         <parameter name="timeout">
92                                 <para>Timeout integer in seconds. Note the timeout will only start
93                                 once the sound file has stopped playing.</para>
94                         </parameter>
95                         <parameter name="options">
96                                 <optionlist>
97                                         <option name="n">
98                                                 <para>Don't answer the channel if it has not already been answered.</para>
99                                         </option>
100                                 </optionlist>
101                         </parameter>
102                 </syntax>
103                 <description>
104                         <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
105                         of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
106                         the speech recognition engine is working. Once results are available the application returns and results
107                         (score and text) are available using dialplan functions.</para>
108                         <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
109                         and ${SPEECH_SCORE(1)}.</para>
110                         <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
111                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
112                         
113                 </description>
114         </application>
115         <application name="SpeechDeactivateGrammar" language="en_US">
116                 <synopsis>
117                         Deactivate a grammar.
118                 </synopsis>
119                 <syntax>
120                         <parameter name="grammar_name" required="true">
121                                 <para>The grammar name to deactivate</para>
122                         </parameter>
123                 </syntax>
124                 <description>
125                         <para>This deactivates the specified grammar so that it is no longer recognized.</para>
126                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
127                 </description>
128         </application>
129         <application name="SpeechProcessingSound" language="en_US">
130                 <synopsis>
131                         Change background processing sound.
132                 </synopsis>
133                 <syntax>
134                         <parameter name="sound_file" required="true" />
135                 </syntax>
136                 <description>
137                         <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
138                         processing and working to get results.</para>
139                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
140                 </description>
141         </application>
142         <application name="SpeechDestroy" language="en_US">
143                 <synopsis>
144                         End speech recognition.
145                 </synopsis>
146                 <syntax />
147                 <description>
148                         <para>This destroys the information used by all the other speech recognition applications.
149                         If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
150                         again before calling any other application.</para>
151                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
152                 </description>
153         </application>
154         <application name="SpeechLoadGrammar" language="en_US">
155                 <synopsis>
156                         Load a grammar.
157                 </synopsis>
158                 <syntax>
159                         <parameter name="grammar_name" required="true" />
160                         <parameter name="path" required="true" />
161                 </syntax>
162                 <description>
163                         <para>Load a grammar only on the channel, not globally.</para>
164                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
165                 </description>
166         </application>
167         <application name="SpeechUnloadGrammar" language="en_US">
168                 <synopsis>
169                         Unload a grammar.
170                 </synopsis>
171                 <syntax>
172                         <parameter name="grammar_name" required="true" />
173                 </syntax>
174                 <description>
175                         <para>Unload a grammar.</para>
176                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
177                 </description>
178         </application>
179         <function name="SPEECH_SCORE" language="en_US">
180                 <synopsis>
181                         Gets the confidence score of a result.
182                 </synopsis>
183                 <syntax argsep="/">
184                         <parameter name="nbest_number" />
185                         <parameter name="result_number" required="true" />
186                 </syntax>
187                 <description>
188                         <para>Gets the confidence score of a result.</para>
189                 </description>
190         </function>
191         <function name="SPEECH_TEXT" language="en_US">
192                 <synopsis>
193                         Gets the recognized text of a result.
194                 </synopsis>
195                 <syntax argsep="/">
196                         <parameter name="nbest_number" />
197                         <parameter name="result_number" required="true" />
198                 </syntax>
199                 <description>
200                         <para>Gets the recognized text of a result.</para>
201                 </description>
202         </function>
203         <function name="SPEECH_GRAMMAR" language="en_US">
204                 <synopsis>
205                         Gets the matched grammar of a result if available.
206                 </synopsis>
207                 <syntax argsep="/">
208                         <parameter name="nbest_number" />
209                         <parameter name="result_number" required="true" />
210                 </syntax>
211                 <description>
212                         <para>Gets the matched grammar of a result if available.</para>
213                 </description>
214         </function>
215         <function name="SPEECH_ENGINE" language="en_US">
216                 <synopsis>
217                         Get or change a speech engine specific attribute.
218                 </synopsis>
219                 <syntax>
220                         <parameter name="name" required="true" />
221                 </syntax>
222                 <description>
223                         <para>Changes a speech engine specific attribute.</para>
224                 </description>
225         </function>
226         <function name="SPEECH_RESULTS_TYPE" language="en_US">
227                 <synopsis>
228                         Sets the type of results that will be returned.
229                 </synopsis>
230                 <syntax />
231                 <description>
232                         <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
233                 </description>
234         </function>
235         <function name="SPEECH" language="en_US">
236                 <synopsis>
237                         Gets information about speech recognition results.
238                 </synopsis>
239                 <syntax>
240                         <parameter name="argument" required="true">
241                                 <enumlist>
242                                         <enum name="status">
243                                                 <para>Returns <literal>1</literal> upon speech object existing,
244                                                 or <literal>0</literal> if not</para>
245                                         </enum>
246                                         <enum name="spoke">
247                                                 <para>Returns <literal>1</literal> if spoker spoke,
248                                                 or <literal>0</literal> if not</para>
249                                         </enum>
250                                         <enum name="results">
251                                                 <para>Returns number of results that were recognized.</para>
252                                         </enum>
253                                 </enumlist>
254                         </parameter>
255                 </syntax>
256                 <description>
257                         <para>Gets information about speech recognition results.</para>
258                 </description>
259         </function>
260  ***/
261
262 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
263 static void destroy_callback(void *data)
264 {
265         struct ast_speech *speech = (struct ast_speech*)data;
266
267         if (speech == NULL) {
268                 return;
269         }
270
271         /* Deallocate now */
272         ast_speech_destroy(speech);
273
274         return;
275 }
276
277 /*! \brief Static structure for datastore information */
278 static const struct ast_datastore_info speech_datastore = {
279         .type = "speech",
280         .destroy = destroy_callback
281 };
282
283 /*! \brief Helper function used to find the speech structure attached to a channel */
284 static struct ast_speech *find_speech(struct ast_channel *chan)
285 {
286         struct ast_speech *speech = NULL;
287         struct ast_datastore *datastore = NULL;
288
289         if (!chan) {
290                 return NULL;
291         }
292
293         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
294         if (datastore == NULL) {
295                 return NULL;
296         }
297         speech = datastore->data;
298
299         return speech;
300 }
301
302 /* Helper function to find a specific speech recognition result by number and nbest alternative */
303 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
304 {
305         struct ast_speech_result *result = results;
306         char *tmp = NULL;
307         int nbest_num = 0, wanted_num = 0, i = 0;
308
309         if (!result) {
310                 return NULL;
311         }
312
313         if ((tmp = strchr(result_num, '/'))) {
314                 *tmp++ = '\0';
315                 nbest_num = atoi(result_num);
316                 wanted_num = atoi(tmp);
317         } else {
318                 wanted_num = atoi(result_num);
319         }
320
321         do {
322                 if (result->nbest_num != nbest_num)
323                         continue;
324                 if (i == wanted_num)
325                         break;
326                 i++;
327         } while ((result = AST_LIST_NEXT(result, list)));
328
329         return result;
330 }
331
332 /*! \brief SPEECH_SCORE() Dialplan Function */
333 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
334                        char *buf, size_t len)
335 {
336         struct ast_speech_result *result = NULL;
337         struct ast_speech *speech = find_speech(chan);
338         char tmp[128] = "";
339
340         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
341                 return -1;
342         }
343         
344         snprintf(tmp, sizeof(tmp), "%d", result->score);
345         
346         ast_copy_string(buf, tmp, len);
347
348         return 0;
349 }
350
351 static struct ast_custom_function speech_score_function = {
352         .name = "SPEECH_SCORE",
353         .read = speech_score,
354         .write = NULL,
355 };
356
357 /*! \brief SPEECH_TEXT() Dialplan Function */
358 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
359                         char *buf, size_t len)
360 {
361         struct ast_speech_result *result = NULL;
362         struct ast_speech *speech = find_speech(chan);
363
364         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
365                 return -1;
366         }
367
368         if (result->text != NULL) {
369                 ast_copy_string(buf, result->text, len);
370         } else {
371                 buf[0] = '\0';
372         }
373
374         return 0;
375 }
376
377 static struct ast_custom_function speech_text_function = {
378         .name = "SPEECH_TEXT",
379         .read = speech_text,
380         .write = NULL,
381 };
382
383 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
384 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
385                         char *buf, size_t len)
386 {
387         struct ast_speech_result *result = NULL;
388         struct ast_speech *speech = find_speech(chan);
389
390         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
391                 return -1;
392         }
393
394         if (result->grammar != NULL) {
395                 ast_copy_string(buf, result->grammar, len);
396         } else {
397                 buf[0] = '\0';
398         }
399
400         return 0;
401 }
402
403 static struct ast_custom_function speech_grammar_function = {
404         .name = "SPEECH_GRAMMAR",
405         .read = speech_grammar,
406         .write = NULL,
407 };
408
409 /*! \brief SPEECH_ENGINE() Dialplan Set Function */
410 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
411 {
412         struct ast_speech *speech = find_speech(chan);
413
414         if (data == NULL || speech == NULL) {
415                 return -1;
416         }
417
418         ast_speech_change(speech, data, value);
419
420         return 0;
421 }
422
423 /*! \brief SPEECH_ENGINE() Dialplan Get Function */
424 static int speech_engine_read(struct ast_channel *chan, const char *cmd, char *data, char *buf, size_t len)
425 {
426         struct ast_speech *speech = find_speech(chan);
427
428         if (!data || !speech) {
429                 return -1;
430         }
431
432         return ast_speech_get_setting(speech, data, buf, len);
433 }
434
435 static struct ast_custom_function speech_engine_function = {
436         .name = "SPEECH_ENGINE",
437         .read = speech_engine_read,
438         .write = speech_engine_write,
439 };
440
441 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
442 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
443 {
444         struct ast_speech *speech = find_speech(chan);
445
446         if (data == NULL || speech == NULL)
447                 return -1;
448
449         if (!strcasecmp(value, "normal"))
450                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
451         else if (!strcasecmp(value, "nbest"))
452                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
453
454         return 0;
455 }
456
457 static struct ast_custom_function speech_results_type_function = {
458         .name = "SPEECH_RESULTS_TYPE",
459         .read = NULL,
460         .write = speech_results_type_write,
461 };
462
463 /*! \brief SPEECH() Dialplan Function */
464 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
465                         char *buf, size_t len)
466 {
467         int results = 0;
468         struct ast_speech_result *result = NULL;
469         struct ast_speech *speech = find_speech(chan);
470         char tmp[128] = "";
471
472         /* Now go for the various options */
473         if (!strcasecmp(data, "status")) {
474                 if (speech != NULL)
475                         ast_copy_string(buf, "1", len);
476                 else
477                         ast_copy_string(buf, "0", len);
478                 return 0;
479         }
480
481         /* Make sure we have a speech structure for everything else */
482         if (speech == NULL) {
483                 return -1;
484         }
485
486         /* Check to see if they are checking for silence */
487         if (!strcasecmp(data, "spoke")) {
488                 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
489                         ast_copy_string(buf, "1", len);
490                 else
491                         ast_copy_string(buf, "0", len);
492         } else if (!strcasecmp(data, "results")) {
493                 /* Count number of results */
494                 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
495                         results++;
496                 snprintf(tmp, sizeof(tmp), "%d", results);
497                 ast_copy_string(buf, tmp, len);
498         } else {
499                 buf[0] = '\0';
500         }
501
502         return 0;
503 }
504
505 static struct ast_custom_function speech_function = {
506         .name = "SPEECH",
507         .read = speech_read,
508         .write = NULL,
509 };
510
511
512
513 /*! \brief SpeechCreate() Dialplan Application */
514 static int speech_create(struct ast_channel *chan, const char *data)
515 {
516         struct ast_speech *speech = NULL;
517         struct ast_datastore *datastore = NULL;
518
519         /* Request a speech object */
520         speech = ast_speech_new(data, ast_channel_nativeformats(chan));
521         if (speech == NULL) {
522                 /* Not available */
523                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
524                 return 0;
525         }
526
527         datastore = ast_datastore_alloc(&speech_datastore, NULL);
528         if (datastore == NULL) {
529                 ast_speech_destroy(speech);
530                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
531                 return 0;
532         }
533         pbx_builtin_setvar_helper(chan, "ERROR", NULL);
534         datastore->data = speech;
535         ast_channel_datastore_add(chan, datastore);
536
537         return 0;
538 }
539
540 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
541 static int speech_load(struct ast_channel *chan, const char *vdata)
542 {
543         int res = 0;
544         struct ast_speech *speech = find_speech(chan);
545         char *data;
546         AST_DECLARE_APP_ARGS(args,
547                 AST_APP_ARG(grammar);
548                 AST_APP_ARG(path);
549         );
550
551         data = ast_strdupa(vdata);
552         AST_STANDARD_APP_ARGS(args, data);
553
554         if (speech == NULL)
555                 return -1;
556
557         if (args.argc != 2)
558                 return -1;
559
560         /* Load the grammar locally on the object */
561         res = ast_speech_grammar_load(speech, args.grammar, args.path);
562
563         return res;
564 }
565
566 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
567 static int speech_unload(struct ast_channel *chan, const char *data)
568 {
569         int res = 0;
570         struct ast_speech *speech = find_speech(chan);
571
572         if (speech == NULL)
573                 return -1;
574
575         /* Unload the grammar */
576         res = ast_speech_grammar_unload(speech, data);
577
578         return res;
579 }
580
581 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
582 static int speech_deactivate(struct ast_channel *chan, const char *data)
583 {
584         int res = 0;
585         struct ast_speech *speech = find_speech(chan);
586
587         if (speech == NULL)
588                 return -1;
589
590         /* Deactivate the grammar on the speech object */
591         res = ast_speech_grammar_deactivate(speech, data);
592
593         return res;
594 }
595
596 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
597 static int speech_activate(struct ast_channel *chan, const char *data)
598 {
599         int res = 0;
600         struct ast_speech *speech = find_speech(chan);
601
602         if (speech == NULL)
603                 return -1;
604
605         /* Activate the grammar on the speech object */
606         res = ast_speech_grammar_activate(speech, data);
607
608         return res;
609 }
610
611 /*! \brief SpeechStart() Dialplan Application */
612 static int speech_start(struct ast_channel *chan, const char *data)
613 {
614         int res = 0;
615         struct ast_speech *speech = find_speech(chan);
616
617         if (speech == NULL)
618                 return -1;
619
620         ast_speech_start(speech);
621
622         return res;
623 }
624
625 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
626 static int speech_processing_sound(struct ast_channel *chan, const char *data)
627 {
628         int res = 0;
629         struct ast_speech *speech = find_speech(chan);
630
631         if (speech == NULL)
632                 return -1;
633
634         if (speech->processing_sound != NULL) {
635                 ast_free(speech->processing_sound);
636                 speech->processing_sound = NULL;
637         }
638
639         speech->processing_sound = ast_strdup(data);
640
641         return res;
642 }
643
644 /*! \brief Helper function used by speech_background to playback a soundfile */
645 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
646 {
647         struct ast_filestream *fs = NULL;
648
649         if (!(fs = ast_openstream(chan, filename, preflang)))
650                 return -1;
651         
652         if (ast_applystream(chan, fs))
653                 return -1;
654         
655         ast_playstream(fs);
656
657         return 0;
658 }
659
660 enum {
661         SB_OPT_NOANSWER = (1 << 0),
662 };
663
664 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
665         AST_APP_OPTION('n', SB_OPT_NOANSWER),
666 END_OPTIONS );
667
668 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
669 static int speech_background(struct ast_channel *chan, const char *data)
670 {
671         unsigned int timeout = 0;
672         int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
673         struct ast_speech *speech = find_speech(chan);
674         struct ast_frame *f = NULL;
675         struct ast_format oldreadformat;
676         char dtmf[AST_MAX_EXTENSION] = "";
677         struct timeval start = { 0, 0 }, current;
678         struct ast_datastore *datastore = NULL;
679         char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
680         const char *tmp2 = NULL;
681         struct ast_flags options = { 0 };
682         AST_DECLARE_APP_ARGS(args,
683                 AST_APP_ARG(soundfile);
684                 AST_APP_ARG(timeout);
685                 AST_APP_ARG(options);
686         );
687
688         parse = ast_strdupa(data);
689         AST_STANDARD_APP_ARGS(args, parse);
690
691         ast_format_clear(&oldreadformat);
692         if (speech == NULL)
693                 return -1;
694
695         if (!ast_strlen_zero(args.options)) {
696                 char *options_buf = ast_strdupa(args.options);
697                 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
698         }
699
700         /* If channel is not already answered, then answer it */
701         if (ast_channel_state(chan) != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
702                 && ast_answer(chan)) {
703                         return -1;
704         }
705
706         /* Record old read format */
707         ast_format_copy(&oldreadformat, ast_channel_readformat(chan));
708
709         /* Change read format to be signed linear */
710         if (ast_set_read_format(chan, &speech->format))
711                 return -1;
712
713         if (!ast_strlen_zero(args.soundfile)) {
714                 /* Yay sound file */
715                 filename_tmp = ast_strdupa(args.soundfile);
716                 if (!ast_strlen_zero(args.timeout)) {
717                         if ((timeout = atof(args.timeout) * 1000.0) == 0)
718                                 timeout = -1;
719                 } else
720                         timeout = 0;
721         }
722
723         /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
724         ast_channel_lock(chan);
725         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
726                 max_dtmf_len = atoi(tmp2);
727         }
728         
729         /* See if a terminator is specified */
730         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
731                 if (ast_strlen_zero(tmp2))
732                         dtmf_terminator = '\0';
733                 else
734                         dtmf_terminator = tmp2[0];
735         }
736         ast_channel_unlock(chan);
737
738         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
739         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
740                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
741                 ast_speech_start(speech);
742         }
743
744         /* Ensure no streams are currently running */
745         ast_stopstream(chan);
746
747         /* Okay it's streaming so go into a loop grabbing frames! */
748         while (done == 0) {
749                 /* If the filename is null and stream is not running, start up a new sound file */
750                 if (!quieted && (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) && (filename = strsep(&filename_tmp, "&"))) {
751                         /* Discard old stream information */
752                         ast_stopstream(chan);
753                         /* Start new stream */
754                         speech_streamfile(chan, filename, ast_channel_language(chan));
755                 }
756
757                 /* Run scheduled stuff */
758                 ast_sched_runq(ast_channel_sched(chan));
759
760                 /* Yay scheduling */
761                 res = ast_sched_wait(ast_channel_sched(chan));
762                 if (res < 0)
763                         res = 1000;
764
765                 /* If there is a frame waiting, get it - if not - oh well */
766                 if (ast_waitfor(chan, res) > 0) {
767                         f = ast_read(chan);
768                         if (f == NULL) {
769                                 /* The channel has hung up most likely */
770                                 done = 3;
771                                 break;
772                         }
773                 }
774
775                 /* Do timeout check (shared between audio/dtmf) */
776                 if ((!quieted || strlen(dtmf)) && started == 1) {
777                         current = ast_tvnow();
778                         if ((ast_tvdiff_ms(current, start)) >= timeout) {
779                                 done = 1;
780                                 if (f)
781                                         ast_frfree(f);
782                                 break;
783                         }
784                 }
785
786                 /* Do checks on speech structure to see if it's changed */
787                 ast_mutex_lock(&speech->lock);
788                 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
789                         if (ast_channel_stream(chan))
790                                 ast_stopstream(chan);
791                         ast_clear_flag(speech, AST_SPEECH_QUIET);
792                         quieted = 1;
793                 }
794                 /* Check state so we can see what to do */
795                 switch (speech->state) {
796                 case AST_SPEECH_STATE_READY:
797                         /* If audio playback has stopped do a check for timeout purposes */
798                         if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL)
799                                 ast_stopstream(chan);
800                         if (!quieted && ast_channel_stream(chan) == NULL && timeout && started == 0 && !filename_tmp) {
801                                 if (timeout == -1) {
802                                         done = 1;
803                                         if (f)
804                                                 ast_frfree(f);
805                                         break;
806                                 }
807                                 start = ast_tvnow();
808                                 started = 1;
809                         }
810                         /* Write audio frame out to speech engine if no DTMF has been received */
811                         if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
812                                 ast_speech_write(speech, f->data.ptr, f->datalen);
813                         }
814                         break;
815                 case AST_SPEECH_STATE_WAIT:
816                         /* Cue up waiting sound if not already playing */
817                         if (!strlen(dtmf)) {
818                                 if (ast_channel_stream(chan) == NULL) {
819                                         if (speech->processing_sound != NULL) {
820                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
821                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
822                                                 }
823                                         }
824                                 } else if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) {
825                                         ast_stopstream(chan);
826                                         if (speech->processing_sound != NULL) {
827                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
828                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
829                                                 }
830                                         }
831                                 }
832                         }
833                         break;
834                 case AST_SPEECH_STATE_DONE:
835                         /* Now that we are done... let's switch back to not ready state */
836                         ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
837                         if (!strlen(dtmf)) {
838                                 /* Copy to speech structure the results, if available */
839                                 speech->results = ast_speech_results_get(speech);
840                                 /* Break out of our background too */
841                                 done = 1;
842                                 /* Stop audio playback */
843                                 if (ast_channel_stream(chan) != NULL) {
844                                         ast_stopstream(chan);
845                                 }
846                         }
847                         break;
848                 default:
849                         break;
850                 }
851                 ast_mutex_unlock(&speech->lock);
852
853                 /* Deal with other frame types */
854                 if (f != NULL) {
855                         /* Free the frame we received */
856                         switch (f->frametype) {
857                         case AST_FRAME_DTMF:
858                                 if (dtmf_terminator != '\0' && f->subclass.integer == dtmf_terminator) {
859                                         done = 1;
860                                 } else {
861                                         quieted = 1;
862                                         if (ast_channel_stream(chan) != NULL) {
863                                                 ast_stopstream(chan);
864                                         }
865                                         if (!started) {
866                                                 /* Change timeout to be 5 seconds for DTMF input */
867                                                 timeout = (ast_channel_pbx(chan) && ast_channel_pbx(chan)->dtimeoutms) ? ast_channel_pbx(chan)->dtimeoutms : 5000;
868                                                 started = 1;
869                                         }
870                                         start = ast_tvnow();
871                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass.integer);
872                                         strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
873                                         /* If the maximum length of the DTMF has been reached, stop now */
874                                         if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
875                                                 done = 1;
876                                 }
877                                 break;
878                         case AST_FRAME_CONTROL:
879                                 switch (f->subclass.integer) {
880                                 case AST_CONTROL_HANGUP:
881                                         /* Since they hung up we should destroy the speech structure */
882                                         done = 3;
883                                 default:
884                                         break;
885                                 }
886                         default:
887                                 break;
888                         }
889                         ast_frfree(f);
890                         f = NULL;
891                 }
892         }
893
894         if (!ast_strlen_zero(dtmf)) {
895                 /* We sort of make a results entry */
896                 speech->results = ast_calloc(1, sizeof(*speech->results));
897                 if (speech->results != NULL) {
898                         ast_speech_dtmf(speech, dtmf);
899                         speech->results->score = 1000;
900                         speech->results->text = ast_strdup(dtmf);
901                         speech->results->grammar = ast_strdup("dtmf");
902                 }
903                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
904         }
905
906         /* See if it was because they hung up */
907         if (done == 3) {
908                 /* Destroy speech structure */
909                 ast_speech_destroy(speech);
910                 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
911                 if (datastore != NULL)
912                         ast_channel_datastore_remove(chan, datastore);
913         } else {
914                 /* Channel is okay so restore read format */
915                 ast_set_read_format(chan, &oldreadformat);
916         }
917
918         return 0;
919 }
920
921
922 /*! \brief SpeechDestroy() Dialplan Application */
923 static int speech_destroy(struct ast_channel *chan, const char *data)
924 {
925         int res = 0;
926         struct ast_speech *speech = find_speech(chan);
927         struct ast_datastore *datastore = NULL;
928
929         if (speech == NULL)
930                 return -1;
931
932         /* Destroy speech structure */
933         ast_speech_destroy(speech);
934
935         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
936         if (datastore != NULL) {
937                 ast_channel_datastore_remove(chan, datastore);
938         }
939
940         return res;
941 }
942
943 static int unload_module(void)
944 {
945         int res = 0;
946
947         res = ast_unregister_application("SpeechCreate");
948         res |= ast_unregister_application("SpeechLoadGrammar");
949         res |= ast_unregister_application("SpeechUnloadGrammar");
950         res |= ast_unregister_application("SpeechActivateGrammar");
951         res |= ast_unregister_application("SpeechDeactivateGrammar");
952         res |= ast_unregister_application("SpeechStart");
953         res |= ast_unregister_application("SpeechBackground");
954         res |= ast_unregister_application("SpeechDestroy");
955         res |= ast_unregister_application("SpeechProcessingSound");
956         res |= ast_custom_function_unregister(&speech_function);
957         res |= ast_custom_function_unregister(&speech_score_function);
958         res |= ast_custom_function_unregister(&speech_text_function);
959         res |= ast_custom_function_unregister(&speech_grammar_function);
960         res |= ast_custom_function_unregister(&speech_engine_function);
961         res |= ast_custom_function_unregister(&speech_results_type_function);
962
963         return res;     
964 }
965
966 static int load_module(void)
967 {
968         int res = 0;
969
970         res = ast_register_application_xml("SpeechCreate", speech_create);
971         res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
972         res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
973         res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
974         res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
975         res |= ast_register_application_xml("SpeechStart", speech_start);
976         res |= ast_register_application_xml("SpeechBackground", speech_background);
977         res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
978         res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
979         res |= ast_custom_function_register(&speech_function);
980         res |= ast_custom_function_register(&speech_score_function);
981         res |= ast_custom_function_register(&speech_text_function);
982         res |= ast_custom_function_register(&speech_grammar_function);
983         res |= ast_custom_function_register(&speech_engine_function);
984         res |= ast_custom_function_register(&speech_results_type_function);
985
986         return res;
987 }
988
989 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Dialplan Speech Applications",
990                 .load = load_module,
991                 .unload = unload_module,
992                 .nonoptreq = "res_speech",
993                 );