670efa2bf7afc617b8a517790a42e4ed45c7b146
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 #include "asterisk.h"
29
30 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
31
32 #include "asterisk/file.h"
33 #include "asterisk/channel.h"
34 #include "asterisk/pbx.h"
35 #include "asterisk/module.h"
36 #include "asterisk/lock.h"
37 #include "asterisk/app.h"
38 #include "asterisk/speech.h"
39
40 /*** DOCUMENTATION
41         <application name="SpeechCreate" language="en_US">
42                 <synopsis>
43                         Create a Speech Structure.
44                 </synopsis>
45                 <syntax>
46                         <parameter name="engine_name" required="true" />
47                 </syntax>
48                 <description>
49                         <para>This application creates information to be used by all the other applications.
50                         It must be called before doing any speech recognition activities such as activating a grammar.
51                         It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
52                 </description>
53         </application>
54         <application name="SpeechActivateGrammar" language="en_US">
55                 <synopsis>
56                         Activate a grammar.
57                 </synopsis>
58                 <syntax>
59                         <parameter name="grammar_name" required="true" />
60                 </syntax>
61                 <description>
62                         <para>This activates the specified grammar to be recognized by the engine.
63                         A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
64                         in the dialplan. The grammar name is the only argument to this application.</para>
65                 </description>
66         </application>
67         <application name="SpeechStart" language="en_US">
68                 <synopsis>
69                         Start recognizing voice in the audio stream.
70                 </synopsis>
71                 <syntax />
72                 <description>
73                         <para>Tell the speech recognition engine that it should start trying to get results from audio being
74                         fed to it.</para>
75                 </description>
76         </application>
77         <application name="SpeechBackground" language="en_US">
78                 <synopsis>
79                         Play a sound file and wait for speech to be recognized.
80                 </synopsis>
81                 <syntax>
82                         <parameter name="sound_file" required="true" />
83                         <parameter name="timeout">
84                                 <para>Timeout integer in seconds. Note the timeout will only start
85                                 once the sound file has stopped playing.</para>
86                         </parameter>
87                         <parameter name="options">
88                                 <optionlist>
89                                         <option name="n">
90                                                 <para>Don't answer the channel if it has not already been answered.</para>
91                                         </option>
92                                 </optionlist>
93                         </parameter>
94                 </syntax>
95                 <description>
96                         <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
97                         of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
98                         the speech recognition engine is working. Once results are available the application returns and results
99                         (score and text) are available using dialplan functions.</para>
100                         <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
101                         and ${SPEECH_SCORE(1)}.</para>
102                         <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
103                         
104                 </description>
105         </application>
106         <application name="SpeechDeactivateGrammar" language="en_US">
107                 <synopsis>
108                         Deactivate a grammar.
109                 </synopsis>
110                 <syntax>
111                         <parameter name="grammar_name" required="true">
112                                 <para>The grammar name to deactivate</para>
113                         </parameter>
114                 </syntax>
115                 <description>
116                         <para>This deactivates the specified grammar so that it is no longer recognized.</para>
117                 </description>
118         </application>
119         <application name="SpeechProcessingSound" language="en_US">
120                 <synopsis>
121                         Change background processing sound.
122                 </synopsis>
123                 <syntax>
124                         <parameter name="sound_file" required="true" />
125                 </syntax>
126                 <description>
127                         <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
128                         processing and working to get results.</para>
129                 </description>
130         </application>
131         <application name="SpeechDestroy" language="en_US">
132                 <synopsis>
133                         End speech recognition.
134                 </synopsis>
135                 <syntax />
136                 <description>
137                         <para>This destroys the information used by all the other speech recognition applications.
138                         If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
139                         again before calling any other application.</para>
140                 </description>
141         </application>
142         <application name="SpeechLoadGrammar" language="en_US">
143                 <synopsis>
144                         Load a grammar.
145                 </synopsis>
146                 <syntax>
147                         <parameter name="grammar_name" required="true" />
148                         <parameter name="path" required="true" />
149                 </syntax>
150                 <description>
151                         <para>Load a grammar only on the channel, not globally.</para>
152                 </description>
153         </application>
154         <application name="SpeechUnloadGrammar" language="en_US">
155                 <synopsis>
156                         Unload a grammar.
157                 </synopsis>
158                 <syntax>
159                         <parameter name="grammar_name" required="true" />
160                 </syntax>
161                 <description>
162                         <para>Unload a grammar.</para>
163                 </description>
164         </application>
165         <function name="SPEECH_SCORE" language="en_US">
166                 <synopsis>
167                         Gets the confidence score of a result.
168                 </synopsis>
169                 <syntax argsep="/">
170                         <parameter name="nbest_number" />
171                         <parameter name="result_number" required="true" />
172                 </syntax>
173                 <description>
174                         <para>Gets the confidence score of a result.</para>
175                 </description>
176         </function>
177         <function name="SPEECH_TEXT" language="en_US">
178                 <synopsis>
179                         Gets the recognized text of a result.
180                 </synopsis>
181                 <syntax argsep="/">
182                         <parameter name="nbest_number" />
183                         <parameter name="result_number" required="true" />
184                 </syntax>
185                 <description>
186                         <para>Gets the recognized text of a result.</para>
187                 </description>
188         </function>
189         <function name="SPEECH_GRAMMAR" language="en_US">
190                 <synopsis>
191                         Gets the matched grammar of a result if available.
192                 </synopsis>
193                 <syntax argsep="/">
194                         <parameter name="nbest_number" />
195                         <parameter name="result_number" required="true" />
196                 </syntax>
197                 <description>
198                         <para>Gets the matched grammar of a result if available.</para>
199                 </description>
200         </function>
201         <function name="SPEECH_ENGINE" language="en_US">
202                 <synopsis>
203                         Change a speech engine specific attribute.
204                 </synopsis>
205                 <syntax>
206                         <parameter name="name" required="true" />
207                 </syntax>
208                 <description>
209                         <para>Changes a speech engine specific attribute.</para>
210                 </description>
211         </function>
212         <function name="SPEECH_RESULTS_TYPE" language="en_US">
213                 <synopsis>
214                         Sets the type of results that will be returned.
215                 </synopsis>
216                 <syntax />
217                 <description>
218                         <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
219                 </description>
220         </function>
221         <function name="SPEECH" language="en_US">
222                 <synopsis>
223                         Gets information about speech recognition results.
224                 </synopsis>
225                 <syntax>
226                         <parameter name="argument" required="true">
227                                 <enumlist>
228                                         <enum name="status">
229                                                 <para>Returns <literal>1</literal> upon speech object existing,
230                                                 or <literal>0</literal> if not</para>
231                                         </enum>
232                                         <enum name="spoke">
233                                                 <para>Returns <literal>1</literal> if spoker spoke,
234                                                 or <literal>0</literal> if not</para>
235                                         </enum>
236                                         <enum name="results">
237                                                 <para>Returns number of results that were recognized.</para>
238                                         </enum>
239                                 </enumlist>
240                         </parameter>
241                 </syntax>
242                 <description>
243                         <para>Gets information about speech recognition results.</para>
244                 </description>
245         </function>
246  ***/
247
248 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
249 static void destroy_callback(void *data)
250 {
251         struct ast_speech *speech = (struct ast_speech*)data;
252
253         if (speech == NULL) {
254                 return;
255         }
256
257         /* Deallocate now */
258         ast_speech_destroy(speech);
259
260         return;
261 }
262
263 /*! \brief Static structure for datastore information */
264 static const struct ast_datastore_info speech_datastore = {
265         .type = "speech",
266         .destroy = destroy_callback
267 };
268
269 /*! \brief Helper function used to find the speech structure attached to a channel */
270 static struct ast_speech *find_speech(struct ast_channel *chan)
271 {
272         struct ast_speech *speech = NULL;
273         struct ast_datastore *datastore = NULL;
274         
275         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
276         if (datastore == NULL) {
277                 return NULL;
278         }
279         speech = datastore->data;
280
281         return speech;
282 }
283
284 /* Helper function to find a specific speech recognition result by number and nbest alternative */
285 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
286 {
287         struct ast_speech_result *result = results;
288         char *tmp = NULL;
289         int nbest_num = 0, wanted_num = 0, i = 0;
290
291         if (!result) {
292                 return NULL;
293         }
294
295         if ((tmp = strchr(result_num, '/'))) {
296                 *tmp++ = '\0';
297                 nbest_num = atoi(result_num);
298                 wanted_num = atoi(tmp);
299         } else {
300                 wanted_num = atoi(result_num);
301         }
302
303         do {
304                 if (result->nbest_num != nbest_num)
305                         continue;
306                 if (i == wanted_num)
307                         break;
308                 i++;
309         } while ((result = AST_LIST_NEXT(result, list)));
310
311         return result;
312 }
313
314 /*! \brief SPEECH_SCORE() Dialplan Function */
315 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
316                        char *buf, size_t len)
317 {
318         struct ast_speech_result *result = NULL;
319         struct ast_speech *speech = find_speech(chan);
320         char tmp[128] = "";
321
322         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
323                 return -1;
324         }
325         
326         snprintf(tmp, sizeof(tmp), "%d", result->score);
327         
328         ast_copy_string(buf, tmp, len);
329
330         return 0;
331 }
332
333 static struct ast_custom_function speech_score_function = {
334         .name = "SPEECH_SCORE",
335         .read = speech_score,
336         .write = NULL,
337 };
338
339 /*! \brief SPEECH_TEXT() Dialplan Function */
340 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
341                         char *buf, size_t len)
342 {
343         struct ast_speech_result *result = NULL;
344         struct ast_speech *speech = find_speech(chan);
345
346         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
347                 return -1;
348         }
349
350         if (result->text != NULL) {
351                 ast_copy_string(buf, result->text, len);
352         } else {
353                 buf[0] = '\0';
354         }
355
356         return 0;
357 }
358
359 static struct ast_custom_function speech_text_function = {
360         .name = "SPEECH_TEXT",
361         .read = speech_text,
362         .write = NULL,
363 };
364
365 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
366 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
367                         char *buf, size_t len)
368 {
369         struct ast_speech_result *result = NULL;
370         struct ast_speech *speech = find_speech(chan);
371
372         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
373                 return -1;
374         }
375
376         if (result->grammar != NULL) {
377                 ast_copy_string(buf, result->grammar, len);
378         } else {
379                 buf[0] = '\0';
380         }
381
382         return 0;
383 }
384
385 static struct ast_custom_function speech_grammar_function = {
386         .name = "SPEECH_GRAMMAR",
387         .read = speech_grammar,
388         .write = NULL,
389 };
390
391 /*! \brief SPEECH_ENGINE() Dialplan Function */
392 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
393 {
394         struct ast_speech *speech = find_speech(chan);
395
396         if (data == NULL || speech == NULL) {
397                 return -1;
398         }
399
400         ast_speech_change(speech, data, value);
401
402         return 0;
403 }
404
405 static struct ast_custom_function speech_engine_function = {
406         .name = "SPEECH_ENGINE",
407         .read = NULL,
408         .write = speech_engine_write,
409 };
410
411 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
412 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
413 {
414         struct ast_speech *speech = find_speech(chan);
415
416         if (data == NULL || speech == NULL)
417                 return -1;
418
419         if (!strcasecmp(value, "normal"))
420                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
421         else if (!strcasecmp(value, "nbest"))
422                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
423
424         return 0;
425 }
426
427 static struct ast_custom_function speech_results_type_function = {
428         .name = "SPEECH_RESULTS_TYPE",
429         .read = NULL,
430         .write = speech_results_type_write,
431 };
432
433 /*! \brief SPEECH() Dialplan Function */
434 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
435                         char *buf, size_t len)
436 {
437         int results = 0;
438         struct ast_speech_result *result = NULL;
439         struct ast_speech *speech = find_speech(chan);
440         char tmp[128] = "";
441
442         /* Now go for the various options */
443         if (!strcasecmp(data, "status")) {
444                 if (speech != NULL)
445                         ast_copy_string(buf, "1", len);
446                 else
447                         ast_copy_string(buf, "0", len);
448                 return 0;
449         }
450
451         /* Make sure we have a speech structure for everything else */
452         if (speech == NULL) {
453                 return -1;
454         }
455
456         /* Check to see if they are checking for silence */
457         if (!strcasecmp(data, "spoke")) {
458                 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
459                         ast_copy_string(buf, "1", len);
460                 else
461                         ast_copy_string(buf, "0", len);
462         } else if (!strcasecmp(data, "results")) {
463                 /* Count number of results */
464                 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
465                         results++;
466                 snprintf(tmp, sizeof(tmp), "%d", results);
467                 ast_copy_string(buf, tmp, len);
468         } else {
469                 buf[0] = '\0';
470         }
471
472         return 0;
473 }
474
475 static struct ast_custom_function speech_function = {
476         .name = "SPEECH",
477         .read = speech_read,
478         .write = NULL,
479 };
480
481
482
483 /*! \brief SpeechCreate() Dialplan Application */
484 static int speech_create(struct ast_channel *chan, void *data)
485 {
486         struct ast_speech *speech = NULL;
487         struct ast_datastore *datastore = NULL;
488
489         /* Request a speech object */
490         speech = ast_speech_new(data, chan->nativeformats);
491         if (speech == NULL) {
492                 /* Not available */
493                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
494                 return 0;
495         }
496
497         datastore = ast_datastore_alloc(&speech_datastore, NULL);
498         if (datastore == NULL) {
499                 ast_speech_destroy(speech);
500                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
501                 return 0;
502         }
503         datastore->data = speech;
504         ast_channel_datastore_add(chan, datastore);
505
506         return 0;
507 }
508
509 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
510 static int speech_load(struct ast_channel *chan, void *vdata)
511 {
512         int res = 0;
513         struct ast_speech *speech = find_speech(chan);
514         char *data;
515         AST_DECLARE_APP_ARGS(args,
516                 AST_APP_ARG(grammar);
517                 AST_APP_ARG(path);
518         );
519
520         data = ast_strdupa(vdata);
521         AST_STANDARD_APP_ARGS(args, data);
522
523         if (speech == NULL)
524                 return -1;
525
526         if (args.argc != 2)
527                 return -1;
528
529         /* Load the grammar locally on the object */
530         res = ast_speech_grammar_load(speech, args.grammar, args.path);
531
532         return res;
533 }
534
535 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
536 static int speech_unload(struct ast_channel *chan, void *data)
537 {
538         int res = 0;
539         struct ast_speech *speech = find_speech(chan);
540
541         if (speech == NULL)
542                 return -1;
543
544         /* Unload the grammar */
545         res = ast_speech_grammar_unload(speech, data);
546
547         return res;
548 }
549
550 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
551 static int speech_deactivate(struct ast_channel *chan, void *data)
552 {
553         int res = 0;
554         struct ast_speech *speech = find_speech(chan);
555
556         if (speech == NULL)
557                 return -1;
558
559         /* Deactivate the grammar on the speech object */
560         res = ast_speech_grammar_deactivate(speech, data);
561
562         return res;
563 }
564
565 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
566 static int speech_activate(struct ast_channel *chan, void *data)
567 {
568         int res = 0;
569         struct ast_speech *speech = find_speech(chan);
570
571         if (speech == NULL)
572                 return -1;
573
574         /* Activate the grammar on the speech object */
575         res = ast_speech_grammar_activate(speech, data);
576
577         return res;
578 }
579
580 /*! \brief SpeechStart() Dialplan Application */
581 static int speech_start(struct ast_channel *chan, void *data)
582 {
583         int res = 0;
584         struct ast_speech *speech = find_speech(chan);
585
586         if (speech == NULL)
587                 return -1;
588
589         ast_speech_start(speech);
590
591         return res;
592 }
593
594 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
595 static int speech_processing_sound(struct ast_channel *chan, void *data)
596 {
597         int res = 0;
598         struct ast_speech *speech = find_speech(chan);
599
600         if (speech == NULL)
601                 return -1;
602
603         if (speech->processing_sound != NULL) {
604                 ast_free(speech->processing_sound);
605                 speech->processing_sound = NULL;
606         }
607
608         speech->processing_sound = ast_strdup(data);
609
610         return res;
611 }
612
613 /*! \brief Helper function used by speech_background to playback a soundfile */
614 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
615 {
616         struct ast_filestream *fs = NULL;
617
618         if (!(fs = ast_openstream(chan, filename, preflang)))
619                 return -1;
620         
621         if (ast_applystream(chan, fs))
622                 return -1;
623         
624         ast_playstream(fs);
625
626         return 0;
627 }
628
629 enum {
630         SB_OPT_NOANSWER = (1 << 0),
631 };
632
633 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
634         AST_APP_OPTION('n', SB_OPT_NOANSWER),
635 END_OPTIONS );
636
637 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
638 static int speech_background(struct ast_channel *chan, void *data)
639 {
640         unsigned int timeout = 0;
641         int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
642         struct ast_speech *speech = find_speech(chan);
643         struct ast_frame *f = NULL;
644         int oldreadformat = AST_FORMAT_SLINEAR;
645         char dtmf[AST_MAX_EXTENSION] = "";
646         struct timeval start = { 0, 0 }, current;
647         struct ast_datastore *datastore = NULL;
648         char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
649         const char *tmp2 = NULL;
650         struct ast_flags options = { 0 };
651         AST_DECLARE_APP_ARGS(args,
652                 AST_APP_ARG(soundfile);
653                 AST_APP_ARG(timeout);
654                 AST_APP_ARG(options);
655         );
656
657         parse = ast_strdupa(data);
658         AST_STANDARD_APP_ARGS(args, parse);
659
660         if (speech == NULL)
661                 return -1;
662
663         if (!ast_strlen_zero(args.options)) {
664                 char *options_buf = ast_strdupa(args.options);
665                 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
666         }
667
668         /* If channel is not already answered, then answer it */
669         if (chan->_state != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
670                 && ast_answer(chan)) {
671                         return -1;
672         }
673
674         /* Record old read format */
675         oldreadformat = chan->readformat;
676
677         /* Change read format to be signed linear */
678         if (ast_set_read_format(chan, speech->format))
679                 return -1;
680
681         if (!ast_strlen_zero(args.soundfile)) {
682                 /* Yay sound file */
683                 filename_tmp = ast_strdupa(args.soundfile);
684                 if (!ast_strlen_zero(args.timeout)) {
685                         if ((timeout = atof(args.timeout) * 1000.0) == 0)
686                                 timeout = -1;
687                 } else
688                         timeout = 0;
689         }
690
691         /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
692         ast_channel_lock(chan);
693         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
694                 max_dtmf_len = atoi(tmp2);
695         }
696         
697         /* See if a terminator is specified */
698         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
699                 if (ast_strlen_zero(tmp2))
700                         dtmf_terminator = '\0';
701                 else
702                         dtmf_terminator = tmp2[0];
703         }
704         ast_channel_unlock(chan);
705
706         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
707         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
708                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
709                 ast_speech_start(speech);
710         }
711
712         /* Ensure no streams are currently running */
713         ast_stopstream(chan);
714
715         /* Okay it's streaming so go into a loop grabbing frames! */
716         while (done == 0) {
717                 /* If the filename is null and stream is not running, start up a new sound file */
718                 if (!quieted && (chan->streamid == -1 && chan->timingfunc == NULL) && (filename = strsep(&filename_tmp, "&"))) {
719                         /* Discard old stream information */
720                         ast_stopstream(chan);
721                         /* Start new stream */
722                         speech_streamfile(chan, filename, chan->language);
723                 }
724
725                 /* Run scheduled stuff */
726                 ast_sched_runq(chan->sched);
727
728                 /* Yay scheduling */
729                 res = ast_sched_wait(chan->sched);
730                 if (res < 0)
731                         res = 1000;
732
733                 /* If there is a frame waiting, get it - if not - oh well */
734                 if (ast_waitfor(chan, res) > 0) {
735                         f = ast_read(chan);
736                         if (f == NULL) {
737                                 /* The channel has hung up most likely */
738                                 done = 3;
739                                 break;
740                         }
741                 }
742
743                 /* Do timeout check (shared between audio/dtmf) */
744                 if ((!quieted || strlen(dtmf)) && started == 1) {
745                         current = ast_tvnow();
746                         if ((ast_tvdiff_ms(start, current)) >= timeout) {
747                                 done = 1;
748                                 if (f)
749                                         ast_frfree(f);
750                                 break;
751                         }
752                 }
753
754                 /* Do checks on speech structure to see if it's changed */
755                 ast_mutex_lock(&speech->lock);
756                 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
757                         if (chan->stream)
758                                 ast_stopstream(chan);
759                         ast_clear_flag(speech, AST_SPEECH_QUIET);
760                         quieted = 1;
761                 }
762                 /* Check state so we can see what to do */
763                 switch (speech->state) {
764                 case AST_SPEECH_STATE_READY:
765                         /* If audio playback has stopped do a check for timeout purposes */
766                         if (chan->streamid == -1 && chan->timingfunc == NULL)
767                                 ast_stopstream(chan);
768                         if (!quieted && chan->stream == NULL && timeout && started == 0 && !filename_tmp) {
769                                 if (timeout == -1) {
770                                         done = 1;
771                                         if (f)
772                                                 ast_frfree(f);
773                                         break;
774                                 }
775                                 start = ast_tvnow();
776                                 started = 1;
777                         }
778                         /* Write audio frame out to speech engine if no DTMF has been received */
779                         if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
780                                 ast_speech_write(speech, f->data.ptr, f->datalen);
781                         }
782                         break;
783                 case AST_SPEECH_STATE_WAIT:
784                         /* Cue up waiting sound if not already playing */
785                         if (!strlen(dtmf)) {
786                                 if (chan->stream == NULL) {
787                                         if (speech->processing_sound != NULL) {
788                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
789                                                         speech_streamfile(chan, speech->processing_sound, chan->language);
790                                                 }
791                                         }
792                                 } else if (chan->streamid == -1 && chan->timingfunc == NULL) {
793                                         ast_stopstream(chan);
794                                         if (speech->processing_sound != NULL) {
795                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
796                                                         speech_streamfile(chan, speech->processing_sound, chan->language);
797                                                 }
798                                         }
799                                 }
800                         }
801                         break;
802                 case AST_SPEECH_STATE_DONE:
803                         /* Now that we are done... let's switch back to not ready state */
804                         ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
805                         if (!strlen(dtmf)) {
806                                 /* Copy to speech structure the results, if available */
807                                 speech->results = ast_speech_results_get(speech);
808                                 /* Break out of our background too */
809                                 done = 1;
810                                 /* Stop audio playback */
811                                 if (chan->stream != NULL) {
812                                         ast_stopstream(chan);
813                                 }
814                         }
815                         break;
816                 default:
817                         break;
818                 }
819                 ast_mutex_unlock(&speech->lock);
820
821                 /* Deal with other frame types */
822                 if (f != NULL) {
823                         /* Free the frame we received */
824                         switch (f->frametype) {
825                         case AST_FRAME_DTMF:
826                                 if (dtmf_terminator != '\0' && f->subclass == dtmf_terminator) {
827                                         done = 1;
828                                 } else {
829                                         if (chan->stream != NULL) {
830                                                 ast_stopstream(chan);
831                                         }
832                                         if (!started) {
833                                                 /* Change timeout to be 5 seconds for DTMF input */
834                                                 timeout = (chan->pbx && chan->pbx->dtimeoutms) ? chan->pbx->dtimeoutms : 5000;
835                                                 started = 1;
836                                         }
837                                         start = ast_tvnow();
838                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass);
839                                         strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
840                                         /* If the maximum length of the DTMF has been reached, stop now */
841                                         if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
842                                                 done = 1;
843                                 }
844                                 break;
845                         case AST_FRAME_CONTROL:
846                                 switch (f->subclass) {
847                                 case AST_CONTROL_HANGUP:
848                                         /* Since they hung up we should destroy the speech structure */
849                                         done = 3;
850                                 default:
851                                         break;
852                                 }
853                         default:
854                                 break;
855                         }
856                         ast_frfree(f);
857                         f = NULL;
858                 }
859         }
860
861         if (!ast_strlen_zero(dtmf)) {
862                 /* We sort of make a results entry */
863                 speech->results = ast_calloc(1, sizeof(*speech->results));
864                 if (speech->results != NULL) {
865                         ast_speech_dtmf(speech, dtmf);
866                         speech->results->score = 1000;
867                         speech->results->text = ast_strdup(dtmf);
868                         speech->results->grammar = ast_strdup("dtmf");
869                 }
870                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
871         }
872
873         /* See if it was because they hung up */
874         if (done == 3) {
875                 /* Destroy speech structure */
876                 ast_speech_destroy(speech);
877                 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
878                 if (datastore != NULL)
879                         ast_channel_datastore_remove(chan, datastore);
880         } else {
881                 /* Channel is okay so restore read format */
882                 ast_set_read_format(chan, oldreadformat);
883         }
884
885         return 0;
886 }
887
888
889 /*! \brief SpeechDestroy() Dialplan Application */
890 static int speech_destroy(struct ast_channel *chan, void *data)
891 {
892         int res = 0;
893         struct ast_speech *speech = find_speech(chan);
894         struct ast_datastore *datastore = NULL;
895
896         if (speech == NULL)
897                 return -1;
898
899         /* Destroy speech structure */
900         ast_speech_destroy(speech);
901
902         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
903         if (datastore != NULL) {
904                 ast_channel_datastore_remove(chan, datastore);
905         }
906
907         return res;
908 }
909
910 static int unload_module(void)
911 {
912         int res = 0;
913
914         res = ast_unregister_application("SpeechCreate");
915         res |= ast_unregister_application("SpeechLoadGrammar");
916         res |= ast_unregister_application("SpeechUnloadGrammar");
917         res |= ast_unregister_application("SpeechActivateGrammar");
918         res |= ast_unregister_application("SpeechDeactivateGrammar");
919         res |= ast_unregister_application("SpeechStart");
920         res |= ast_unregister_application("SpeechBackground");
921         res |= ast_unregister_application("SpeechDestroy");
922         res |= ast_unregister_application("SpeechProcessingSound");
923         res |= ast_custom_function_unregister(&speech_function);
924         res |= ast_custom_function_unregister(&speech_score_function);
925         res |= ast_custom_function_unregister(&speech_text_function);
926         res |= ast_custom_function_unregister(&speech_grammar_function);
927         res |= ast_custom_function_unregister(&speech_engine_function);
928         res |= ast_custom_function_unregister(&speech_results_type_function);
929
930         return res;     
931 }
932
933 static int load_module(void)
934 {
935         int res = 0;
936
937         res = ast_register_application_xml("SpeechCreate", speech_create);
938         res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
939         res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
940         res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
941         res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
942         res |= ast_register_application_xml("SpeechStart", speech_start);
943         res |= ast_register_application_xml("SpeechBackground", speech_background);
944         res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
945         res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
946         res |= ast_custom_function_register(&speech_function);
947         res |= ast_custom_function_register(&speech_score_function);
948         res |= ast_custom_function_register(&speech_text_function);
949         res |= ast_custom_function_register(&speech_grammar_function);
950         res |= ast_custom_function_register(&speech_engine_function);
951         res |= ast_custom_function_register(&speech_results_type_function);
952
953         return res;
954 }
955
956 AST_MODULE_INFO_STANDARD(ASTERISK_GPL_KEY, "Dialplan Speech Applications");