d94603837ee652ffeba4473447efc73b6f4ab8b9
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 /*** MODULEINFO
29         <support_level>core</support_level>
30         <depend>res_speech</depend>
31  ***/
32
33 #include "asterisk.h"
34
35 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
36
37 #include "asterisk/file.h"
38 #include "asterisk/channel.h"
39 #include "asterisk/pbx.h"
40 #include "asterisk/module.h"
41 #include "asterisk/lock.h"
42 #include "asterisk/app.h"
43 #include "asterisk/speech.h"
44
45 /*** DOCUMENTATION
46         <application name="SpeechCreate" language="en_US">
47                 <synopsis>
48                         Create a Speech Structure.
49                 </synopsis>
50                 <syntax>
51                         <parameter name="engine_name" required="true" />
52                 </syntax>
53                 <description>
54                         <para>This application creates information to be used by all the other applications.
55                         It must be called before doing any speech recognition activities such as activating a grammar.
56                         It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
57                         <para>Sets the ERROR channel variable to 1 if the engine cannot be used.</para>
58                 </description>
59         </application>
60         <application name="SpeechActivateGrammar" language="en_US">
61                 <synopsis>
62                         Activate a grammar.
63                 </synopsis>
64                 <syntax>
65                         <parameter name="grammar_name" required="true" />
66                 </syntax>
67                 <description>
68                         <para>This activates the specified grammar to be recognized by the engine.
69                         A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
70                         in the dialplan. The grammar name is the only argument to this application.</para>
71                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
72                 </description>
73         </application>
74         <application name="SpeechStart" language="en_US">
75                 <synopsis>
76                         Start recognizing voice in the audio stream.
77                 </synopsis>
78                 <syntax />
79                 <description>
80                         <para>Tell the speech recognition engine that it should start trying to get results from audio being
81                         fed to it.</para>
82                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
83                 </description>
84         </application>
85         <application name="SpeechBackground" language="en_US">
86                 <synopsis>
87                         Play a sound file and wait for speech to be recognized.
88                 </synopsis>
89                 <syntax>
90                         <parameter name="sound_file" required="true" />
91                         <parameter name="timeout">
92                                 <para>Timeout integer in seconds. Note the timeout will only start
93                                 once the sound file has stopped playing.</para>
94                         </parameter>
95                         <parameter name="options">
96                                 <optionlist>
97                                         <option name="n">
98                                                 <para>Don't answer the channel if it has not already been answered.</para>
99                                         </option>
100                                 </optionlist>
101                         </parameter>
102                 </syntax>
103                 <description>
104                         <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
105                         of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
106                         the speech recognition engine is working. Once results are available the application returns and results
107                         (score and text) are available using dialplan functions.</para>
108                         <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
109                         and ${SPEECH_SCORE(1)}.</para>
110                         <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
111                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
112                         
113                 </description>
114         </application>
115         <application name="SpeechDeactivateGrammar" language="en_US">
116                 <synopsis>
117                         Deactivate a grammar.
118                 </synopsis>
119                 <syntax>
120                         <parameter name="grammar_name" required="true">
121                                 <para>The grammar name to deactivate</para>
122                         </parameter>
123                 </syntax>
124                 <description>
125                         <para>This deactivates the specified grammar so that it is no longer recognized.</para>
126                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
127                 </description>
128         </application>
129         <application name="SpeechProcessingSound" language="en_US">
130                 <synopsis>
131                         Change background processing sound.
132                 </synopsis>
133                 <syntax>
134                         <parameter name="sound_file" required="true" />
135                 </syntax>
136                 <description>
137                         <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
138                         processing and working to get results.</para>
139                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
140                 </description>
141         </application>
142         <application name="SpeechDestroy" language="en_US">
143                 <synopsis>
144                         End speech recognition.
145                 </synopsis>
146                 <syntax />
147                 <description>
148                         <para>This destroys the information used by all the other speech recognition applications.
149                         If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
150                         again before calling any other application.</para>
151                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
152                 </description>
153         </application>
154         <application name="SpeechLoadGrammar" language="en_US">
155                 <synopsis>
156                         Load a grammar.
157                 </synopsis>
158                 <syntax>
159                         <parameter name="grammar_name" required="true" />
160                         <parameter name="path" required="true" />
161                 </syntax>
162                 <description>
163                         <para>Load a grammar only on the channel, not globally.</para>
164                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
165                 </description>
166         </application>
167         <application name="SpeechUnloadGrammar" language="en_US">
168                 <synopsis>
169                         Unload a grammar.
170                 </synopsis>
171                 <syntax>
172                         <parameter name="grammar_name" required="true" />
173                 </syntax>
174                 <description>
175                         <para>Unload a grammar.</para>
176                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
177                 </description>
178         </application>
179         <function name="SPEECH_SCORE" language="en_US">
180                 <synopsis>
181                         Gets the confidence score of a result.
182                 </synopsis>
183                 <syntax argsep="/">
184                         <parameter name="nbest_number" />
185                         <parameter name="result_number" required="true" />
186                 </syntax>
187                 <description>
188                         <para>Gets the confidence score of a result.</para>
189                 </description>
190         </function>
191         <function name="SPEECH_TEXT" language="en_US">
192                 <synopsis>
193                         Gets the recognized text of a result.
194                 </synopsis>
195                 <syntax argsep="/">
196                         <parameter name="nbest_number" />
197                         <parameter name="result_number" required="true" />
198                 </syntax>
199                 <description>
200                         <para>Gets the recognized text of a result.</para>
201                 </description>
202         </function>
203         <function name="SPEECH_GRAMMAR" language="en_US">
204                 <synopsis>
205                         Gets the matched grammar of a result if available.
206                 </synopsis>
207                 <syntax argsep="/">
208                         <parameter name="nbest_number" />
209                         <parameter name="result_number" required="true" />
210                 </syntax>
211                 <description>
212                         <para>Gets the matched grammar of a result if available.</para>
213                 </description>
214         </function>
215         <function name="SPEECH_ENGINE" language="en_US">
216                 <synopsis>
217                         Get or change a speech engine specific attribute.
218                 </synopsis>
219                 <syntax>
220                         <parameter name="name" required="true" />
221                 </syntax>
222                 <description>
223                         <para>Changes a speech engine specific attribute.</para>
224                 </description>
225         </function>
226         <function name="SPEECH_RESULTS_TYPE" language="en_US">
227                 <synopsis>
228                         Sets the type of results that will be returned.
229                 </synopsis>
230                 <syntax />
231                 <description>
232                         <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
233                 </description>
234         </function>
235         <function name="SPEECH" language="en_US">
236                 <synopsis>
237                         Gets information about speech recognition results.
238                 </synopsis>
239                 <syntax>
240                         <parameter name="argument" required="true">
241                                 <enumlist>
242                                         <enum name="status">
243                                                 <para>Returns <literal>1</literal> upon speech object existing,
244                                                 or <literal>0</literal> if not</para>
245                                         </enum>
246                                         <enum name="spoke">
247                                                 <para>Returns <literal>1</literal> if spoker spoke,
248                                                 or <literal>0</literal> if not</para>
249                                         </enum>
250                                         <enum name="results">
251                                                 <para>Returns number of results that were recognized.</para>
252                                         </enum>
253                                 </enumlist>
254                         </parameter>
255                 </syntax>
256                 <description>
257                         <para>Gets information about speech recognition results.</para>
258                 </description>
259         </function>
260  ***/
261
262 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
263 static void destroy_callback(void *data)
264 {
265         struct ast_speech *speech = (struct ast_speech*)data;
266
267         if (speech == NULL) {
268                 return;
269         }
270
271         /* Deallocate now */
272         ast_speech_destroy(speech);
273
274         return;
275 }
276
277 /*! \brief Static structure for datastore information */
278 static const struct ast_datastore_info speech_datastore = {
279         .type = "speech",
280         .destroy = destroy_callback
281 };
282
283 /*! \brief Helper function used to find the speech structure attached to a channel */
284 static struct ast_speech *find_speech(struct ast_channel *chan)
285 {
286         struct ast_speech *speech = NULL;
287         struct ast_datastore *datastore = NULL;
288
289         if (!chan) {
290                 return NULL;
291         }
292
293         ast_channel_lock(chan);
294         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
295         ast_channel_unlock(chan);
296         if (datastore == NULL) {
297                 return NULL;
298         }
299         speech = datastore->data;
300
301         return speech;
302 }
303
304 /*!
305  * \internal
306  * \brief Destroy the speech datastore on the given channel.
307  *
308  * \param chan Channel to destroy speech datastore.
309  *
310  * \retval 0 on success.
311  * \retval -1 not found.
312  */
313 static int speech_datastore_destroy(struct ast_channel *chan)
314 {
315         struct ast_datastore *datastore;
316         int res;
317
318         ast_channel_lock(chan);
319         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
320         if (datastore) {
321                 ast_channel_datastore_remove(chan, datastore);
322         }
323         ast_channel_unlock(chan);
324         if (datastore) {
325                 ast_datastore_free(datastore);
326                 res = 0;
327         } else {
328                 res = -1;
329         }
330         return res;
331 }
332
333 /* Helper function to find a specific speech recognition result by number and nbest alternative */
334 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
335 {
336         struct ast_speech_result *result = results;
337         char *tmp = NULL;
338         int nbest_num = 0, wanted_num = 0, i = 0;
339
340         if (!result) {
341                 return NULL;
342         }
343
344         if ((tmp = strchr(result_num, '/'))) {
345                 *tmp++ = '\0';
346                 nbest_num = atoi(result_num);
347                 wanted_num = atoi(tmp);
348         } else {
349                 wanted_num = atoi(result_num);
350         }
351
352         do {
353                 if (result->nbest_num != nbest_num)
354                         continue;
355                 if (i == wanted_num)
356                         break;
357                 i++;
358         } while ((result = AST_LIST_NEXT(result, list)));
359
360         return result;
361 }
362
363 /*! \brief SPEECH_SCORE() Dialplan Function */
364 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
365                        char *buf, size_t len)
366 {
367         struct ast_speech_result *result = NULL;
368         struct ast_speech *speech = find_speech(chan);
369         char tmp[128] = "";
370
371         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
372                 return -1;
373         }
374         
375         snprintf(tmp, sizeof(tmp), "%d", result->score);
376         
377         ast_copy_string(buf, tmp, len);
378
379         return 0;
380 }
381
382 static struct ast_custom_function speech_score_function = {
383         .name = "SPEECH_SCORE",
384         .read = speech_score,
385         .write = NULL,
386 };
387
388 /*! \brief SPEECH_TEXT() Dialplan Function */
389 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
390                         char *buf, size_t len)
391 {
392         struct ast_speech_result *result = NULL;
393         struct ast_speech *speech = find_speech(chan);
394
395         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
396                 return -1;
397         }
398
399         if (result->text != NULL) {
400                 ast_copy_string(buf, result->text, len);
401         } else {
402                 buf[0] = '\0';
403         }
404
405         return 0;
406 }
407
408 static struct ast_custom_function speech_text_function = {
409         .name = "SPEECH_TEXT",
410         .read = speech_text,
411         .write = NULL,
412 };
413
414 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
415 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
416                         char *buf, size_t len)
417 {
418         struct ast_speech_result *result = NULL;
419         struct ast_speech *speech = find_speech(chan);
420
421         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
422                 return -1;
423         }
424
425         if (result->grammar != NULL) {
426                 ast_copy_string(buf, result->grammar, len);
427         } else {
428                 buf[0] = '\0';
429         }
430
431         return 0;
432 }
433
434 static struct ast_custom_function speech_grammar_function = {
435         .name = "SPEECH_GRAMMAR",
436         .read = speech_grammar,
437         .write = NULL,
438 };
439
440 /*! \brief SPEECH_ENGINE() Dialplan Set Function */
441 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
442 {
443         struct ast_speech *speech = find_speech(chan);
444
445         if (data == NULL || speech == NULL) {
446                 return -1;
447         }
448
449         ast_speech_change(speech, data, value);
450
451         return 0;
452 }
453
454 /*! \brief SPEECH_ENGINE() Dialplan Get Function */
455 static int speech_engine_read(struct ast_channel *chan, const char *cmd, char *data, char *buf, size_t len)
456 {
457         struct ast_speech *speech = find_speech(chan);
458
459         if (!data || !speech) {
460                 return -1;
461         }
462
463         return ast_speech_get_setting(speech, data, buf, len);
464 }
465
466 static struct ast_custom_function speech_engine_function = {
467         .name = "SPEECH_ENGINE",
468         .read = speech_engine_read,
469         .write = speech_engine_write,
470 };
471
472 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
473 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
474 {
475         struct ast_speech *speech = find_speech(chan);
476
477         if (data == NULL || speech == NULL)
478                 return -1;
479
480         if (!strcasecmp(value, "normal"))
481                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
482         else if (!strcasecmp(value, "nbest"))
483                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
484
485         return 0;
486 }
487
488 static struct ast_custom_function speech_results_type_function = {
489         .name = "SPEECH_RESULTS_TYPE",
490         .read = NULL,
491         .write = speech_results_type_write,
492 };
493
494 /*! \brief SPEECH() Dialplan Function */
495 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
496                         char *buf, size_t len)
497 {
498         int results = 0;
499         struct ast_speech_result *result = NULL;
500         struct ast_speech *speech = find_speech(chan);
501         char tmp[128] = "";
502
503         /* Now go for the various options */
504         if (!strcasecmp(data, "status")) {
505                 if (speech != NULL)
506                         ast_copy_string(buf, "1", len);
507                 else
508                         ast_copy_string(buf, "0", len);
509                 return 0;
510         }
511
512         /* Make sure we have a speech structure for everything else */
513         if (speech == NULL) {
514                 return -1;
515         }
516
517         /* Check to see if they are checking for silence */
518         if (!strcasecmp(data, "spoke")) {
519                 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
520                         ast_copy_string(buf, "1", len);
521                 else
522                         ast_copy_string(buf, "0", len);
523         } else if (!strcasecmp(data, "results")) {
524                 /* Count number of results */
525                 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
526                         results++;
527                 snprintf(tmp, sizeof(tmp), "%d", results);
528                 ast_copy_string(buf, tmp, len);
529         } else {
530                 buf[0] = '\0';
531         }
532
533         return 0;
534 }
535
536 static struct ast_custom_function speech_function = {
537         .name = "SPEECH",
538         .read = speech_read,
539         .write = NULL,
540 };
541
542
543
544 /*! \brief SpeechCreate() Dialplan Application */
545 static int speech_create(struct ast_channel *chan, const char *data)
546 {
547         struct ast_speech *speech = NULL;
548         struct ast_datastore *datastore = NULL;
549
550         /* Request a speech object */
551         speech = ast_speech_new(data, ast_channel_nativeformats(chan));
552         if (speech == NULL) {
553                 /* Not available */
554                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
555                 return 0;
556         }
557
558         datastore = ast_datastore_alloc(&speech_datastore, NULL);
559         if (datastore == NULL) {
560                 ast_speech_destroy(speech);
561                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
562                 return 0;
563         }
564         pbx_builtin_setvar_helper(chan, "ERROR", NULL);
565         datastore->data = speech;
566         ast_channel_lock(chan);
567         ast_channel_datastore_add(chan, datastore);
568         ast_channel_unlock(chan);
569
570         return 0;
571 }
572
573 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
574 static int speech_load(struct ast_channel *chan, const char *vdata)
575 {
576         int res = 0;
577         struct ast_speech *speech = find_speech(chan);
578         char *data;
579         AST_DECLARE_APP_ARGS(args,
580                 AST_APP_ARG(grammar);
581                 AST_APP_ARG(path);
582         );
583
584         data = ast_strdupa(vdata);
585         AST_STANDARD_APP_ARGS(args, data);
586
587         if (speech == NULL)
588                 return -1;
589
590         if (args.argc != 2)
591                 return -1;
592
593         /* Load the grammar locally on the object */
594         res = ast_speech_grammar_load(speech, args.grammar, args.path);
595
596         return res;
597 }
598
599 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
600 static int speech_unload(struct ast_channel *chan, const char *data)
601 {
602         int res = 0;
603         struct ast_speech *speech = find_speech(chan);
604
605         if (speech == NULL)
606                 return -1;
607
608         /* Unload the grammar */
609         res = ast_speech_grammar_unload(speech, data);
610
611         return res;
612 }
613
614 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
615 static int speech_deactivate(struct ast_channel *chan, const char *data)
616 {
617         int res = 0;
618         struct ast_speech *speech = find_speech(chan);
619
620         if (speech == NULL)
621                 return -1;
622
623         /* Deactivate the grammar on the speech object */
624         res = ast_speech_grammar_deactivate(speech, data);
625
626         return res;
627 }
628
629 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
630 static int speech_activate(struct ast_channel *chan, const char *data)
631 {
632         int res = 0;
633         struct ast_speech *speech = find_speech(chan);
634
635         if (speech == NULL)
636                 return -1;
637
638         /* Activate the grammar on the speech object */
639         res = ast_speech_grammar_activate(speech, data);
640
641         return res;
642 }
643
644 /*! \brief SpeechStart() Dialplan Application */
645 static int speech_start(struct ast_channel *chan, const char *data)
646 {
647         int res = 0;
648         struct ast_speech *speech = find_speech(chan);
649
650         if (speech == NULL)
651                 return -1;
652
653         ast_speech_start(speech);
654
655         return res;
656 }
657
658 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
659 static int speech_processing_sound(struct ast_channel *chan, const char *data)
660 {
661         int res = 0;
662         struct ast_speech *speech = find_speech(chan);
663
664         if (speech == NULL)
665                 return -1;
666
667         if (speech->processing_sound != NULL) {
668                 ast_free(speech->processing_sound);
669                 speech->processing_sound = NULL;
670         }
671
672         speech->processing_sound = ast_strdup(data);
673
674         return res;
675 }
676
677 /*! \brief Helper function used by speech_background to playback a soundfile */
678 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
679 {
680         struct ast_filestream *fs = NULL;
681
682         if (!(fs = ast_openstream(chan, filename, preflang)))
683                 return -1;
684         
685         if (ast_applystream(chan, fs))
686                 return -1;
687         
688         ast_playstream(fs);
689
690         return 0;
691 }
692
693 enum {
694         SB_OPT_NOANSWER = (1 << 0),
695 };
696
697 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
698         AST_APP_OPTION('n', SB_OPT_NOANSWER),
699 END_OPTIONS );
700
701 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
702 static int speech_background(struct ast_channel *chan, const char *data)
703 {
704         unsigned int timeout = 0;
705         int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
706         struct ast_speech *speech = find_speech(chan);
707         struct ast_frame *f = NULL;
708         RAII_VAR(struct ast_format *, oldreadformat, NULL, ao2_cleanup);
709         char dtmf[AST_MAX_EXTENSION] = "";
710         struct timeval start = { 0, 0 }, current;
711         char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
712         const char *tmp2 = NULL;
713         struct ast_flags options = { 0 };
714         AST_DECLARE_APP_ARGS(args,
715                 AST_APP_ARG(soundfile);
716                 AST_APP_ARG(timeout);
717                 AST_APP_ARG(options);
718         );
719
720         parse = ast_strdupa(data);
721         AST_STANDARD_APP_ARGS(args, parse);
722
723         if (speech == NULL)
724                 return -1;
725
726         if (!ast_strlen_zero(args.options)) {
727                 char *options_buf = ast_strdupa(args.options);
728                 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
729         }
730
731         /* If channel is not already answered, then answer it */
732         if (ast_channel_state(chan) != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
733                 && ast_answer(chan)) {
734                         return -1;
735         }
736
737         /* Record old read format */
738         oldreadformat = ao2_bump(ast_channel_readformat(chan));
739
740         /* Change read format to be signed linear */
741         if (ast_set_read_format(chan, speech->format))
742                 return -1;
743
744         if (!ast_strlen_zero(args.soundfile)) {
745                 /* Yay sound file */
746                 filename_tmp = ast_strdupa(args.soundfile);
747                 if (!ast_strlen_zero(args.timeout)) {
748                         if ((timeout = atof(args.timeout) * 1000.0) == 0)
749                                 timeout = -1;
750                 } else
751                         timeout = 0;
752         }
753
754         /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
755         ast_channel_lock(chan);
756         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
757                 max_dtmf_len = atoi(tmp2);
758         }
759         
760         /* See if a terminator is specified */
761         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
762                 if (ast_strlen_zero(tmp2))
763                         dtmf_terminator = '\0';
764                 else
765                         dtmf_terminator = tmp2[0];
766         }
767         ast_channel_unlock(chan);
768
769         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
770         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
771                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
772                 ast_speech_start(speech);
773         }
774
775         /* Ensure no streams are currently running */
776         ast_stopstream(chan);
777
778         /* Okay it's streaming so go into a loop grabbing frames! */
779         while (done == 0) {
780                 /* If the filename is null and stream is not running, start up a new sound file */
781                 if (!quieted && (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) && (filename = strsep(&filename_tmp, "&"))) {
782                         /* Discard old stream information */
783                         ast_stopstream(chan);
784                         /* Start new stream */
785                         speech_streamfile(chan, filename, ast_channel_language(chan));
786                 }
787
788                 /* Run scheduled stuff */
789                 ast_sched_runq(ast_channel_sched(chan));
790
791                 /* Yay scheduling */
792                 res = ast_sched_wait(ast_channel_sched(chan));
793                 if (res < 0)
794                         res = 1000;
795
796                 /* If there is a frame waiting, get it - if not - oh well */
797                 if (ast_waitfor(chan, res) > 0) {
798                         f = ast_read(chan);
799                         if (f == NULL) {
800                                 /* The channel has hung up most likely */
801                                 done = 3;
802                                 break;
803                         }
804                 }
805
806                 /* Do timeout check (shared between audio/dtmf) */
807                 if ((!quieted || strlen(dtmf)) && started == 1) {
808                         current = ast_tvnow();
809                         if ((ast_tvdiff_ms(current, start)) >= timeout) {
810                                 done = 1;
811                                 if (f)
812                                         ast_frfree(f);
813                                 break;
814                         }
815                 }
816
817                 /* Do checks on speech structure to see if it's changed */
818                 ast_mutex_lock(&speech->lock);
819                 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
820                         if (ast_channel_stream(chan))
821                                 ast_stopstream(chan);
822                         ast_clear_flag(speech, AST_SPEECH_QUIET);
823                         quieted = 1;
824                 }
825                 /* Check state so we can see what to do */
826                 switch (speech->state) {
827                 case AST_SPEECH_STATE_READY:
828                         /* If audio playback has stopped do a check for timeout purposes */
829                         if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL)
830                                 ast_stopstream(chan);
831                         if (!quieted && ast_channel_stream(chan) == NULL && timeout && started == 0 && !filename_tmp) {
832                                 if (timeout == -1) {
833                                         done = 1;
834                                         if (f)
835                                                 ast_frfree(f);
836                                         break;
837                                 }
838                                 start = ast_tvnow();
839                                 started = 1;
840                         }
841                         /* Write audio frame out to speech engine if no DTMF has been received */
842                         if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
843                                 ast_speech_write(speech, f->data.ptr, f->datalen);
844                         }
845                         break;
846                 case AST_SPEECH_STATE_WAIT:
847                         /* Cue up waiting sound if not already playing */
848                         if (!strlen(dtmf)) {
849                                 if (ast_channel_stream(chan) == NULL) {
850                                         if (speech->processing_sound != NULL) {
851                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
852                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
853                                                 }
854                                         }
855                                 } else if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) {
856                                         ast_stopstream(chan);
857                                         if (speech->processing_sound != NULL) {
858                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
859                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
860                                                 }
861                                         }
862                                 }
863                         }
864                         break;
865                 case AST_SPEECH_STATE_DONE:
866                         /* Now that we are done... let's switch back to not ready state */
867                         ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
868                         if (!strlen(dtmf)) {
869                                 /* Copy to speech structure the results, if available */
870                                 speech->results = ast_speech_results_get(speech);
871                                 /* Break out of our background too */
872                                 done = 1;
873                                 /* Stop audio playback */
874                                 if (ast_channel_stream(chan) != NULL) {
875                                         ast_stopstream(chan);
876                                 }
877                         }
878                         break;
879                 default:
880                         break;
881                 }
882                 ast_mutex_unlock(&speech->lock);
883
884                 /* Deal with other frame types */
885                 if (f != NULL) {
886                         /* Free the frame we received */
887                         switch (f->frametype) {
888                         case AST_FRAME_DTMF:
889                                 if (dtmf_terminator != '\0' && f->subclass.integer == dtmf_terminator) {
890                                         done = 1;
891                                 } else {
892                                         quieted = 1;
893                                         if (ast_channel_stream(chan) != NULL) {
894                                                 ast_stopstream(chan);
895                                         }
896                                         if (!started) {
897                                                 /* Change timeout to be 5 seconds for DTMF input */
898                                                 timeout = (ast_channel_pbx(chan) && ast_channel_pbx(chan)->dtimeoutms) ? ast_channel_pbx(chan)->dtimeoutms : 5000;
899                                                 started = 1;
900                                         }
901                                         start = ast_tvnow();
902                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass.integer);
903                                         strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
904                                         /* If the maximum length of the DTMF has been reached, stop now */
905                                         if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
906                                                 done = 1;
907                                 }
908                                 break;
909                         case AST_FRAME_CONTROL:
910                                 switch (f->subclass.integer) {
911                                 case AST_CONTROL_HANGUP:
912                                         /* Since they hung up we should destroy the speech structure */
913                                         done = 3;
914                                 default:
915                                         break;
916                                 }
917                         default:
918                                 break;
919                         }
920                         ast_frfree(f);
921                         f = NULL;
922                 }
923         }
924
925         if (!ast_strlen_zero(dtmf)) {
926                 /* We sort of make a results entry */
927                 speech->results = ast_calloc(1, sizeof(*speech->results));
928                 if (speech->results != NULL) {
929                         ast_speech_dtmf(speech, dtmf);
930                         speech->results->score = 1000;
931                         speech->results->text = ast_strdup(dtmf);
932                         speech->results->grammar = ast_strdup("dtmf");
933                 }
934                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
935         }
936
937         /* See if it was because they hung up */
938         if (done == 3) {
939                 speech_datastore_destroy(chan);
940         } else {
941                 /* Channel is okay so restore read format */
942                 ast_set_read_format(chan, oldreadformat);
943         }
944
945         return 0;
946 }
947
948
949 /*! \brief SpeechDestroy() Dialplan Application */
950 static int speech_destroy(struct ast_channel *chan, const char *data)
951 {
952         if (!chan) {
953                 return -1;
954         }
955         return speech_datastore_destroy(chan);
956 }
957
958 static int unload_module(void)
959 {
960         int res = 0;
961
962         res = ast_unregister_application("SpeechCreate");
963         res |= ast_unregister_application("SpeechLoadGrammar");
964         res |= ast_unregister_application("SpeechUnloadGrammar");
965         res |= ast_unregister_application("SpeechActivateGrammar");
966         res |= ast_unregister_application("SpeechDeactivateGrammar");
967         res |= ast_unregister_application("SpeechStart");
968         res |= ast_unregister_application("SpeechBackground");
969         res |= ast_unregister_application("SpeechDestroy");
970         res |= ast_unregister_application("SpeechProcessingSound");
971         res |= ast_custom_function_unregister(&speech_function);
972         res |= ast_custom_function_unregister(&speech_score_function);
973         res |= ast_custom_function_unregister(&speech_text_function);
974         res |= ast_custom_function_unregister(&speech_grammar_function);
975         res |= ast_custom_function_unregister(&speech_engine_function);
976         res |= ast_custom_function_unregister(&speech_results_type_function);
977
978         return res;     
979 }
980
981 static int load_module(void)
982 {
983         int res = 0;
984
985         res = ast_register_application_xml("SpeechCreate", speech_create);
986         res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
987         res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
988         res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
989         res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
990         res |= ast_register_application_xml("SpeechStart", speech_start);
991         res |= ast_register_application_xml("SpeechBackground", speech_background);
992         res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
993         res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
994         res |= ast_custom_function_register(&speech_function);
995         res |= ast_custom_function_register(&speech_score_function);
996         res |= ast_custom_function_register(&speech_text_function);
997         res |= ast_custom_function_register(&speech_grammar_function);
998         res |= ast_custom_function_register(&speech_engine_function);
999         res |= ast_custom_function_register(&speech_results_type_function);
1000
1001         return res;
1002 }
1003
1004 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Dialplan Speech Applications",
1005                 .support_level = AST_MODULE_SUPPORT_CORE,
1006                 .load = load_module,
1007                 .unload = unload_module,
1008                 .nonoptreq = "res_speech",
1009                 );