078fcf2e9a24d67dae45ffc3be895c176f2f26ee
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 /*** MODULEINFO
29         <support_level>core</support_level>
30  ***/
31
32 #include "asterisk.h"
33
34 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
35
36 #include "asterisk/file.h"
37 #include "asterisk/channel.h"
38 #include "asterisk/pbx.h"
39 #include "asterisk/module.h"
40 #include "asterisk/lock.h"
41 #include "asterisk/app.h"
42 #include "asterisk/speech.h"
43
44 /*** DOCUMENTATION
45         <application name="SpeechCreate" language="en_US">
46                 <synopsis>
47                         Create a Speech Structure.
48                 </synopsis>
49                 <syntax>
50                         <parameter name="engine_name" required="true" />
51                 </syntax>
52                 <description>
53                         <para>This application creates information to be used by all the other applications.
54                         It must be called before doing any speech recognition activities such as activating a grammar.
55                         It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
56                 </description>
57         </application>
58         <application name="SpeechActivateGrammar" language="en_US">
59                 <synopsis>
60                         Activate a grammar.
61                 </synopsis>
62                 <syntax>
63                         <parameter name="grammar_name" required="true" />
64                 </syntax>
65                 <description>
66                         <para>This activates the specified grammar to be recognized by the engine.
67                         A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
68                         in the dialplan. The grammar name is the only argument to this application.</para>
69                 </description>
70         </application>
71         <application name="SpeechStart" language="en_US">
72                 <synopsis>
73                         Start recognizing voice in the audio stream.
74                 </synopsis>
75                 <syntax />
76                 <description>
77                         <para>Tell the speech recognition engine that it should start trying to get results from audio being
78                         fed to it.</para>
79                 </description>
80         </application>
81         <application name="SpeechBackground" language="en_US">
82                 <synopsis>
83                         Play a sound file and wait for speech to be recognized.
84                 </synopsis>
85                 <syntax>
86                         <parameter name="sound_file" required="true" />
87                         <parameter name="timeout">
88                                 <para>Timeout integer in seconds. Note the timeout will only start
89                                 once the sound file has stopped playing.</para>
90                         </parameter>
91                         <parameter name="options">
92                                 <optionlist>
93                                         <option name="n">
94                                                 <para>Don't answer the channel if it has not already been answered.</para>
95                                         </option>
96                                 </optionlist>
97                         </parameter>
98                 </syntax>
99                 <description>
100                         <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
101                         of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
102                         the speech recognition engine is working. Once results are available the application returns and results
103                         (score and text) are available using dialplan functions.</para>
104                         <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
105                         and ${SPEECH_SCORE(1)}.</para>
106                         <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
107                         
108                 </description>
109         </application>
110         <application name="SpeechDeactivateGrammar" language="en_US">
111                 <synopsis>
112                         Deactivate a grammar.
113                 </synopsis>
114                 <syntax>
115                         <parameter name="grammar_name" required="true">
116                                 <para>The grammar name to deactivate</para>
117                         </parameter>
118                 </syntax>
119                 <description>
120                         <para>This deactivates the specified grammar so that it is no longer recognized.</para>
121                 </description>
122         </application>
123         <application name="SpeechProcessingSound" language="en_US">
124                 <synopsis>
125                         Change background processing sound.
126                 </synopsis>
127                 <syntax>
128                         <parameter name="sound_file" required="true" />
129                 </syntax>
130                 <description>
131                         <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
132                         processing and working to get results.</para>
133                 </description>
134         </application>
135         <application name="SpeechDestroy" language="en_US">
136                 <synopsis>
137                         End speech recognition.
138                 </synopsis>
139                 <syntax />
140                 <description>
141                         <para>This destroys the information used by all the other speech recognition applications.
142                         If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
143                         again before calling any other application.</para>
144                 </description>
145         </application>
146         <application name="SpeechLoadGrammar" language="en_US">
147                 <synopsis>
148                         Load a grammar.
149                 </synopsis>
150                 <syntax>
151                         <parameter name="grammar_name" required="true" />
152                         <parameter name="path" required="true" />
153                 </syntax>
154                 <description>
155                         <para>Load a grammar only on the channel, not globally.</para>
156                 </description>
157         </application>
158         <application name="SpeechUnloadGrammar" language="en_US">
159                 <synopsis>
160                         Unload a grammar.
161                 </synopsis>
162                 <syntax>
163                         <parameter name="grammar_name" required="true" />
164                 </syntax>
165                 <description>
166                         <para>Unload a grammar.</para>
167                 </description>
168         </application>
169         <function name="SPEECH_SCORE" language="en_US">
170                 <synopsis>
171                         Gets the confidence score of a result.
172                 </synopsis>
173                 <syntax argsep="/">
174                         <parameter name="nbest_number" />
175                         <parameter name="result_number" required="true" />
176                 </syntax>
177                 <description>
178                         <para>Gets the confidence score of a result.</para>
179                 </description>
180         </function>
181         <function name="SPEECH_TEXT" language="en_US">
182                 <synopsis>
183                         Gets the recognized text of a result.
184                 </synopsis>
185                 <syntax argsep="/">
186                         <parameter name="nbest_number" />
187                         <parameter name="result_number" required="true" />
188                 </syntax>
189                 <description>
190                         <para>Gets the recognized text of a result.</para>
191                 </description>
192         </function>
193         <function name="SPEECH_GRAMMAR" language="en_US">
194                 <synopsis>
195                         Gets the matched grammar of a result if available.
196                 </synopsis>
197                 <syntax argsep="/">
198                         <parameter name="nbest_number" />
199                         <parameter name="result_number" required="true" />
200                 </syntax>
201                 <description>
202                         <para>Gets the matched grammar of a result if available.</para>
203                 </description>
204         </function>
205         <function name="SPEECH_ENGINE" language="en_US">
206                 <synopsis>
207                         Change a speech engine specific attribute.
208                 </synopsis>
209                 <syntax>
210                         <parameter name="name" required="true" />
211                 </syntax>
212                 <description>
213                         <para>Changes a speech engine specific attribute.</para>
214                 </description>
215         </function>
216         <function name="SPEECH_RESULTS_TYPE" language="en_US">
217                 <synopsis>
218                         Sets the type of results that will be returned.
219                 </synopsis>
220                 <syntax />
221                 <description>
222                         <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
223                 </description>
224         </function>
225         <function name="SPEECH" language="en_US">
226                 <synopsis>
227                         Gets information about speech recognition results.
228                 </synopsis>
229                 <syntax>
230                         <parameter name="argument" required="true">
231                                 <enumlist>
232                                         <enum name="status">
233                                                 <para>Returns <literal>1</literal> upon speech object existing,
234                                                 or <literal>0</literal> if not</para>
235                                         </enum>
236                                         <enum name="spoke">
237                                                 <para>Returns <literal>1</literal> if spoker spoke,
238                                                 or <literal>0</literal> if not</para>
239                                         </enum>
240                                         <enum name="results">
241                                                 <para>Returns number of results that were recognized.</para>
242                                         </enum>
243                                 </enumlist>
244                         </parameter>
245                 </syntax>
246                 <description>
247                         <para>Gets information about speech recognition results.</para>
248                 </description>
249         </function>
250  ***/
251
252 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
253 static void destroy_callback(void *data)
254 {
255         struct ast_speech *speech = (struct ast_speech*)data;
256
257         if (speech == NULL) {
258                 return;
259         }
260
261         /* Deallocate now */
262         ast_speech_destroy(speech);
263
264         return;
265 }
266
267 /*! \brief Static structure for datastore information */
268 static const struct ast_datastore_info speech_datastore = {
269         .type = "speech",
270         .destroy = destroy_callback
271 };
272
273 /*! \brief Helper function used to find the speech structure attached to a channel */
274 static struct ast_speech *find_speech(struct ast_channel *chan)
275 {
276         struct ast_speech *speech = NULL;
277         struct ast_datastore *datastore = NULL;
278         
279         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
280         if (datastore == NULL) {
281                 return NULL;
282         }
283         speech = datastore->data;
284
285         return speech;
286 }
287
288 /* Helper function to find a specific speech recognition result by number and nbest alternative */
289 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
290 {
291         struct ast_speech_result *result = results;
292         char *tmp = NULL;
293         int nbest_num = 0, wanted_num = 0, i = 0;
294
295         if (!result) {
296                 return NULL;
297         }
298
299         if ((tmp = strchr(result_num, '/'))) {
300                 *tmp++ = '\0';
301                 nbest_num = atoi(result_num);
302                 wanted_num = atoi(tmp);
303         } else {
304                 wanted_num = atoi(result_num);
305         }
306
307         do {
308                 if (result->nbest_num != nbest_num)
309                         continue;
310                 if (i == wanted_num)
311                         break;
312                 i++;
313         } while ((result = AST_LIST_NEXT(result, list)));
314
315         return result;
316 }
317
318 /*! \brief SPEECH_SCORE() Dialplan Function */
319 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
320                        char *buf, size_t len)
321 {
322         struct ast_speech_result *result = NULL;
323         struct ast_speech *speech = find_speech(chan);
324         char tmp[128] = "";
325
326         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
327                 return -1;
328         }
329         
330         snprintf(tmp, sizeof(tmp), "%d", result->score);
331         
332         ast_copy_string(buf, tmp, len);
333
334         return 0;
335 }
336
337 static struct ast_custom_function speech_score_function = {
338         .name = "SPEECH_SCORE",
339         .read = speech_score,
340         .write = NULL,
341 };
342
343 /*! \brief SPEECH_TEXT() Dialplan Function */
344 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
345                         char *buf, size_t len)
346 {
347         struct ast_speech_result *result = NULL;
348         struct ast_speech *speech = find_speech(chan);
349
350         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
351                 return -1;
352         }
353
354         if (result->text != NULL) {
355                 ast_copy_string(buf, result->text, len);
356         } else {
357                 buf[0] = '\0';
358         }
359
360         return 0;
361 }
362
363 static struct ast_custom_function speech_text_function = {
364         .name = "SPEECH_TEXT",
365         .read = speech_text,
366         .write = NULL,
367 };
368
369 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
370 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
371                         char *buf, size_t len)
372 {
373         struct ast_speech_result *result = NULL;
374         struct ast_speech *speech = find_speech(chan);
375
376         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
377                 return -1;
378         }
379
380         if (result->grammar != NULL) {
381                 ast_copy_string(buf, result->grammar, len);
382         } else {
383                 buf[0] = '\0';
384         }
385
386         return 0;
387 }
388
389 static struct ast_custom_function speech_grammar_function = {
390         .name = "SPEECH_GRAMMAR",
391         .read = speech_grammar,
392         .write = NULL,
393 };
394
395 /*! \brief SPEECH_ENGINE() Dialplan Function */
396 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
397 {
398         struct ast_speech *speech = find_speech(chan);
399
400         if (data == NULL || speech == NULL) {
401                 return -1;
402         }
403
404         ast_speech_change(speech, data, value);
405
406         return 0;
407 }
408
409 static struct ast_custom_function speech_engine_function = {
410         .name = "SPEECH_ENGINE",
411         .read = NULL,
412         .write = speech_engine_write,
413 };
414
415 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
416 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
417 {
418         struct ast_speech *speech = find_speech(chan);
419
420         if (data == NULL || speech == NULL)
421                 return -1;
422
423         if (!strcasecmp(value, "normal"))
424                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
425         else if (!strcasecmp(value, "nbest"))
426                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
427
428         return 0;
429 }
430
431 static struct ast_custom_function speech_results_type_function = {
432         .name = "SPEECH_RESULTS_TYPE",
433         .read = NULL,
434         .write = speech_results_type_write,
435 };
436
437 /*! \brief SPEECH() Dialplan Function */
438 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
439                         char *buf, size_t len)
440 {
441         int results = 0;
442         struct ast_speech_result *result = NULL;
443         struct ast_speech *speech = find_speech(chan);
444         char tmp[128] = "";
445
446         /* Now go for the various options */
447         if (!strcasecmp(data, "status")) {
448                 if (speech != NULL)
449                         ast_copy_string(buf, "1", len);
450                 else
451                         ast_copy_string(buf, "0", len);
452                 return 0;
453         }
454
455         /* Make sure we have a speech structure for everything else */
456         if (speech == NULL) {
457                 return -1;
458         }
459
460         /* Check to see if they are checking for silence */
461         if (!strcasecmp(data, "spoke")) {
462                 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
463                         ast_copy_string(buf, "1", len);
464                 else
465                         ast_copy_string(buf, "0", len);
466         } else if (!strcasecmp(data, "results")) {
467                 /* Count number of results */
468                 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
469                         results++;
470                 snprintf(tmp, sizeof(tmp), "%d", results);
471                 ast_copy_string(buf, tmp, len);
472         } else {
473                 buf[0] = '\0';
474         }
475
476         return 0;
477 }
478
479 static struct ast_custom_function speech_function = {
480         .name = "SPEECH",
481         .read = speech_read,
482         .write = NULL,
483 };
484
485
486
487 /*! \brief SpeechCreate() Dialplan Application */
488 static int speech_create(struct ast_channel *chan, const char *data)
489 {
490         struct ast_speech *speech = NULL;
491         struct ast_datastore *datastore = NULL;
492
493         /* Request a speech object */
494         speech = ast_speech_new(data, chan->nativeformats);
495         if (speech == NULL) {
496                 /* Not available */
497                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
498                 return 0;
499         }
500
501         datastore = ast_datastore_alloc(&speech_datastore, NULL);
502         if (datastore == NULL) {
503                 ast_speech_destroy(speech);
504                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
505                 return 0;
506         }
507         pbx_builtin_setvar_helper(chan, "ERROR", NULL);
508         datastore->data = speech;
509         ast_channel_datastore_add(chan, datastore);
510
511         return 0;
512 }
513
514 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
515 static int speech_load(struct ast_channel *chan, const char *vdata)
516 {
517         int res = 0;
518         struct ast_speech *speech = find_speech(chan);
519         char *data;
520         AST_DECLARE_APP_ARGS(args,
521                 AST_APP_ARG(grammar);
522                 AST_APP_ARG(path);
523         );
524
525         data = ast_strdupa(vdata);
526         AST_STANDARD_APP_ARGS(args, data);
527
528         if (speech == NULL)
529                 return -1;
530
531         if (args.argc != 2)
532                 return -1;
533
534         /* Load the grammar locally on the object */
535         res = ast_speech_grammar_load(speech, args.grammar, args.path);
536
537         return res;
538 }
539
540 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
541 static int speech_unload(struct ast_channel *chan, const char *data)
542 {
543         int res = 0;
544         struct ast_speech *speech = find_speech(chan);
545
546         if (speech == NULL)
547                 return -1;
548
549         /* Unload the grammar */
550         res = ast_speech_grammar_unload(speech, data);
551
552         return res;
553 }
554
555 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
556 static int speech_deactivate(struct ast_channel *chan, const char *data)
557 {
558         int res = 0;
559         struct ast_speech *speech = find_speech(chan);
560
561         if (speech == NULL)
562                 return -1;
563
564         /* Deactivate the grammar on the speech object */
565         res = ast_speech_grammar_deactivate(speech, data);
566
567         return res;
568 }
569
570 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
571 static int speech_activate(struct ast_channel *chan, const char *data)
572 {
573         int res = 0;
574         struct ast_speech *speech = find_speech(chan);
575
576         if (speech == NULL)
577                 return -1;
578
579         /* Activate the grammar on the speech object */
580         res = ast_speech_grammar_activate(speech, data);
581
582         return res;
583 }
584
585 /*! \brief SpeechStart() Dialplan Application */
586 static int speech_start(struct ast_channel *chan, const char *data)
587 {
588         int res = 0;
589         struct ast_speech *speech = find_speech(chan);
590
591         if (speech == NULL)
592                 return -1;
593
594         ast_speech_start(speech);
595
596         return res;
597 }
598
599 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
600 static int speech_processing_sound(struct ast_channel *chan, const char *data)
601 {
602         int res = 0;
603         struct ast_speech *speech = find_speech(chan);
604
605         if (speech == NULL)
606                 return -1;
607
608         if (speech->processing_sound != NULL) {
609                 ast_free(speech->processing_sound);
610                 speech->processing_sound = NULL;
611         }
612
613         speech->processing_sound = ast_strdup(data);
614
615         return res;
616 }
617
618 /*! \brief Helper function used by speech_background to playback a soundfile */
619 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
620 {
621         struct ast_filestream *fs = NULL;
622
623         if (!(fs = ast_openstream(chan, filename, preflang)))
624                 return -1;
625         
626         if (ast_applystream(chan, fs))
627                 return -1;
628         
629         ast_playstream(fs);
630
631         return 0;
632 }
633
634 enum {
635         SB_OPT_NOANSWER = (1 << 0),
636 };
637
638 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
639         AST_APP_OPTION('n', SB_OPT_NOANSWER),
640 END_OPTIONS );
641
642 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
643 static int speech_background(struct ast_channel *chan, const char *data)
644 {
645         unsigned int timeout = 0;
646         int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
647         struct ast_speech *speech = find_speech(chan);
648         struct ast_frame *f = NULL;
649         struct ast_format oldreadformat;
650         char dtmf[AST_MAX_EXTENSION] = "";
651         struct timeval start = { 0, 0 }, current;
652         struct ast_datastore *datastore = NULL;
653         char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
654         const char *tmp2 = NULL;
655         struct ast_flags options = { 0 };
656         AST_DECLARE_APP_ARGS(args,
657                 AST_APP_ARG(soundfile);
658                 AST_APP_ARG(timeout);
659                 AST_APP_ARG(options);
660         );
661
662         parse = ast_strdupa(data);
663         AST_STANDARD_APP_ARGS(args, parse);
664
665         ast_format_clear(&oldreadformat);
666         if (speech == NULL)
667                 return -1;
668
669         if (!ast_strlen_zero(args.options)) {
670                 char *options_buf = ast_strdupa(args.options);
671                 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
672         }
673
674         /* If channel is not already answered, then answer it */
675         if (chan->_state != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
676                 && ast_answer(chan)) {
677                         return -1;
678         }
679
680         /* Record old read format */
681         ast_format_copy(&oldreadformat, &chan->readformat);
682
683         /* Change read format to be signed linear */
684         if (ast_set_read_format(chan, &speech->format))
685                 return -1;
686
687         if (!ast_strlen_zero(args.soundfile)) {
688                 /* Yay sound file */
689                 filename_tmp = ast_strdupa(args.soundfile);
690                 if (!ast_strlen_zero(args.timeout)) {
691                         if ((timeout = atof(args.timeout) * 1000.0) == 0)
692                                 timeout = -1;
693                 } else
694                         timeout = 0;
695         }
696
697         /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
698         ast_channel_lock(chan);
699         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
700                 max_dtmf_len = atoi(tmp2);
701         }
702         
703         /* See if a terminator is specified */
704         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
705                 if (ast_strlen_zero(tmp2))
706                         dtmf_terminator = '\0';
707                 else
708                         dtmf_terminator = tmp2[0];
709         }
710         ast_channel_unlock(chan);
711
712         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
713         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
714                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
715                 ast_speech_start(speech);
716         }
717
718         /* Ensure no streams are currently running */
719         ast_stopstream(chan);
720
721         /* Okay it's streaming so go into a loop grabbing frames! */
722         while (done == 0) {
723                 /* If the filename is null and stream is not running, start up a new sound file */
724                 if (!quieted && (chan->streamid == -1 && chan->timingfunc == NULL) && (filename = strsep(&filename_tmp, "&"))) {
725                         /* Discard old stream information */
726                         ast_stopstream(chan);
727                         /* Start new stream */
728                         speech_streamfile(chan, filename, ast_channel_language(chan));
729                 }
730
731                 /* Run scheduled stuff */
732                 ast_sched_runq(chan->sched);
733
734                 /* Yay scheduling */
735                 res = ast_sched_wait(chan->sched);
736                 if (res < 0)
737                         res = 1000;
738
739                 /* If there is a frame waiting, get it - if not - oh well */
740                 if (ast_waitfor(chan, res) > 0) {
741                         f = ast_read(chan);
742                         if (f == NULL) {
743                                 /* The channel has hung up most likely */
744                                 done = 3;
745                                 break;
746                         }
747                 }
748
749                 /* Do timeout check (shared between audio/dtmf) */
750                 if ((!quieted || strlen(dtmf)) && started == 1) {
751                         current = ast_tvnow();
752                         if ((ast_tvdiff_ms(current, start)) >= timeout) {
753                                 done = 1;
754                                 if (f)
755                                         ast_frfree(f);
756                                 break;
757                         }
758                 }
759
760                 /* Do checks on speech structure to see if it's changed */
761                 ast_mutex_lock(&speech->lock);
762                 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
763                         if (chan->stream)
764                                 ast_stopstream(chan);
765                         ast_clear_flag(speech, AST_SPEECH_QUIET);
766                         quieted = 1;
767                 }
768                 /* Check state so we can see what to do */
769                 switch (speech->state) {
770                 case AST_SPEECH_STATE_READY:
771                         /* If audio playback has stopped do a check for timeout purposes */
772                         if (chan->streamid == -1 && chan->timingfunc == NULL)
773                                 ast_stopstream(chan);
774                         if (!quieted && chan->stream == NULL && timeout && started == 0 && !filename_tmp) {
775                                 if (timeout == -1) {
776                                         done = 1;
777                                         if (f)
778                                                 ast_frfree(f);
779                                         break;
780                                 }
781                                 start = ast_tvnow();
782                                 started = 1;
783                         }
784                         /* Write audio frame out to speech engine if no DTMF has been received */
785                         if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
786                                 ast_speech_write(speech, f->data.ptr, f->datalen);
787                         }
788                         break;
789                 case AST_SPEECH_STATE_WAIT:
790                         /* Cue up waiting sound if not already playing */
791                         if (!strlen(dtmf)) {
792                                 if (chan->stream == NULL) {
793                                         if (speech->processing_sound != NULL) {
794                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
795                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
796                                                 }
797                                         }
798                                 } else if (chan->streamid == -1 && chan->timingfunc == NULL) {
799                                         ast_stopstream(chan);
800                                         if (speech->processing_sound != NULL) {
801                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
802                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
803                                                 }
804                                         }
805                                 }
806                         }
807                         break;
808                 case AST_SPEECH_STATE_DONE:
809                         /* Now that we are done... let's switch back to not ready state */
810                         ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
811                         if (!strlen(dtmf)) {
812                                 /* Copy to speech structure the results, if available */
813                                 speech->results = ast_speech_results_get(speech);
814                                 /* Break out of our background too */
815                                 done = 1;
816                                 /* Stop audio playback */
817                                 if (chan->stream != NULL) {
818                                         ast_stopstream(chan);
819                                 }
820                         }
821                         break;
822                 default:
823                         break;
824                 }
825                 ast_mutex_unlock(&speech->lock);
826
827                 /* Deal with other frame types */
828                 if (f != NULL) {
829                         /* Free the frame we received */
830                         switch (f->frametype) {
831                         case AST_FRAME_DTMF:
832                                 if (dtmf_terminator != '\0' && f->subclass.integer == dtmf_terminator) {
833                                         done = 1;
834                                 } else {
835                                         quieted = 1;
836                                         if (chan->stream != NULL) {
837                                                 ast_stopstream(chan);
838                                         }
839                                         if (!started) {
840                                                 /* Change timeout to be 5 seconds for DTMF input */
841                                                 timeout = (chan->pbx && chan->pbx->dtimeoutms) ? chan->pbx->dtimeoutms : 5000;
842                                                 started = 1;
843                                         }
844                                         start = ast_tvnow();
845                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass.integer);
846                                         strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
847                                         /* If the maximum length of the DTMF has been reached, stop now */
848                                         if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
849                                                 done = 1;
850                                 }
851                                 break;
852                         case AST_FRAME_CONTROL:
853                                 switch (f->subclass.integer) {
854                                 case AST_CONTROL_HANGUP:
855                                         /* Since they hung up we should destroy the speech structure */
856                                         done = 3;
857                                 default:
858                                         break;
859                                 }
860                         default:
861                                 break;
862                         }
863                         ast_frfree(f);
864                         f = NULL;
865                 }
866         }
867
868         if (!ast_strlen_zero(dtmf)) {
869                 /* We sort of make a results entry */
870                 speech->results = ast_calloc(1, sizeof(*speech->results));
871                 if (speech->results != NULL) {
872                         ast_speech_dtmf(speech, dtmf);
873                         speech->results->score = 1000;
874                         speech->results->text = ast_strdup(dtmf);
875                         speech->results->grammar = ast_strdup("dtmf");
876                 }
877                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
878         }
879
880         /* See if it was because they hung up */
881         if (done == 3) {
882                 /* Destroy speech structure */
883                 ast_speech_destroy(speech);
884                 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
885                 if (datastore != NULL)
886                         ast_channel_datastore_remove(chan, datastore);
887         } else {
888                 /* Channel is okay so restore read format */
889                 ast_set_read_format(chan, &oldreadformat);
890         }
891
892         return 0;
893 }
894
895
896 /*! \brief SpeechDestroy() Dialplan Application */
897 static int speech_destroy(struct ast_channel *chan, const char *data)
898 {
899         int res = 0;
900         struct ast_speech *speech = find_speech(chan);
901         struct ast_datastore *datastore = NULL;
902
903         if (speech == NULL)
904                 return -1;
905
906         /* Destroy speech structure */
907         ast_speech_destroy(speech);
908
909         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
910         if (datastore != NULL) {
911                 ast_channel_datastore_remove(chan, datastore);
912         }
913
914         return res;
915 }
916
917 static int unload_module(void)
918 {
919         int res = 0;
920
921         res = ast_unregister_application("SpeechCreate");
922         res |= ast_unregister_application("SpeechLoadGrammar");
923         res |= ast_unregister_application("SpeechUnloadGrammar");
924         res |= ast_unregister_application("SpeechActivateGrammar");
925         res |= ast_unregister_application("SpeechDeactivateGrammar");
926         res |= ast_unregister_application("SpeechStart");
927         res |= ast_unregister_application("SpeechBackground");
928         res |= ast_unregister_application("SpeechDestroy");
929         res |= ast_unregister_application("SpeechProcessingSound");
930         res |= ast_custom_function_unregister(&speech_function);
931         res |= ast_custom_function_unregister(&speech_score_function);
932         res |= ast_custom_function_unregister(&speech_text_function);
933         res |= ast_custom_function_unregister(&speech_grammar_function);
934         res |= ast_custom_function_unregister(&speech_engine_function);
935         res |= ast_custom_function_unregister(&speech_results_type_function);
936
937         return res;     
938 }
939
940 static int load_module(void)
941 {
942         int res = 0;
943
944         res = ast_register_application_xml("SpeechCreate", speech_create);
945         res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
946         res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
947         res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
948         res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
949         res |= ast_register_application_xml("SpeechStart", speech_start);
950         res |= ast_register_application_xml("SpeechBackground", speech_background);
951         res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
952         res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
953         res |= ast_custom_function_register(&speech_function);
954         res |= ast_custom_function_register(&speech_score_function);
955         res |= ast_custom_function_register(&speech_text_function);
956         res |= ast_custom_function_register(&speech_grammar_function);
957         res |= ast_custom_function_register(&speech_engine_function);
958         res |= ast_custom_function_register(&speech_results_type_function);
959
960         return res;
961 }
962
963 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Dialplan Speech Applications",
964                 .load = load_module,
965                 .unload = unload_module,
966                 .nonoptreq = "res_speech",
967                 );