Add support for retrieving engine specific settings using the speech API and from...
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 /*** MODULEINFO
29         <support_level>core</support_level>
30  ***/
31
32 #include "asterisk.h"
33
34 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
35
36 #include "asterisk/file.h"
37 #include "asterisk/channel.h"
38 #include "asterisk/pbx.h"
39 #include "asterisk/module.h"
40 #include "asterisk/lock.h"
41 #include "asterisk/app.h"
42 #include "asterisk/speech.h"
43
44 /*** DOCUMENTATION
45         <application name="SpeechCreate" language="en_US">
46                 <synopsis>
47                         Create a Speech Structure.
48                 </synopsis>
49                 <syntax>
50                         <parameter name="engine_name" required="true" />
51                 </syntax>
52                 <description>
53                         <para>This application creates information to be used by all the other applications.
54                         It must be called before doing any speech recognition activities such as activating a grammar.
55                         It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
56                         <para>Sets the ERROR channel variable to 1 if the engine cannot be used.</para>
57                 </description>
58         </application>
59         <application name="SpeechActivateGrammar" language="en_US">
60                 <synopsis>
61                         Activate a grammar.
62                 </synopsis>
63                 <syntax>
64                         <parameter name="grammar_name" required="true" />
65                 </syntax>
66                 <description>
67                         <para>This activates the specified grammar to be recognized by the engine.
68                         A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
69                         in the dialplan. The grammar name is the only argument to this application.</para>
70                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
71                 </description>
72         </application>
73         <application name="SpeechStart" language="en_US">
74                 <synopsis>
75                         Start recognizing voice in the audio stream.
76                 </synopsis>
77                 <syntax />
78                 <description>
79                         <para>Tell the speech recognition engine that it should start trying to get results from audio being
80                         fed to it.</para>
81                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
82                 </description>
83         </application>
84         <application name="SpeechBackground" language="en_US">
85                 <synopsis>
86                         Play a sound file and wait for speech to be recognized.
87                 </synopsis>
88                 <syntax>
89                         <parameter name="sound_file" required="true" />
90                         <parameter name="timeout">
91                                 <para>Timeout integer in seconds. Note the timeout will only start
92                                 once the sound file has stopped playing.</para>
93                         </parameter>
94                         <parameter name="options">
95                                 <optionlist>
96                                         <option name="n">
97                                                 <para>Don't answer the channel if it has not already been answered.</para>
98                                         </option>
99                                 </optionlist>
100                         </parameter>
101                 </syntax>
102                 <description>
103                         <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
104                         of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
105                         the speech recognition engine is working. Once results are available the application returns and results
106                         (score and text) are available using dialplan functions.</para>
107                         <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
108                         and ${SPEECH_SCORE(1)}.</para>
109                         <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
110                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
111                         
112                 </description>
113         </application>
114         <application name="SpeechDeactivateGrammar" language="en_US">
115                 <synopsis>
116                         Deactivate a grammar.
117                 </synopsis>
118                 <syntax>
119                         <parameter name="grammar_name" required="true">
120                                 <para>The grammar name to deactivate</para>
121                         </parameter>
122                 </syntax>
123                 <description>
124                         <para>This deactivates the specified grammar so that it is no longer recognized.</para>
125                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
126                 </description>
127         </application>
128         <application name="SpeechProcessingSound" language="en_US">
129                 <synopsis>
130                         Change background processing sound.
131                 </synopsis>
132                 <syntax>
133                         <parameter name="sound_file" required="true" />
134                 </syntax>
135                 <description>
136                         <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
137                         processing and working to get results.</para>
138                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
139                 </description>
140         </application>
141         <application name="SpeechDestroy" language="en_US">
142                 <synopsis>
143                         End speech recognition.
144                 </synopsis>
145                 <syntax />
146                 <description>
147                         <para>This destroys the information used by all the other speech recognition applications.
148                         If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
149                         again before calling any other application.</para>
150                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
151                 </description>
152         </application>
153         <application name="SpeechLoadGrammar" language="en_US">
154                 <synopsis>
155                         Load a grammar.
156                 </synopsis>
157                 <syntax>
158                         <parameter name="grammar_name" required="true" />
159                         <parameter name="path" required="true" />
160                 </syntax>
161                 <description>
162                         <para>Load a grammar only on the channel, not globally.</para>
163                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
164                 </description>
165         </application>
166         <application name="SpeechUnloadGrammar" language="en_US">
167                 <synopsis>
168                         Unload a grammar.
169                 </synopsis>
170                 <syntax>
171                         <parameter name="grammar_name" required="true" />
172                 </syntax>
173                 <description>
174                         <para>Unload a grammar.</para>
175                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
176                 </description>
177         </application>
178         <function name="SPEECH_SCORE" language="en_US">
179                 <synopsis>
180                         Gets the confidence score of a result.
181                 </synopsis>
182                 <syntax argsep="/">
183                         <parameter name="nbest_number" />
184                         <parameter name="result_number" required="true" />
185                 </syntax>
186                 <description>
187                         <para>Gets the confidence score of a result.</para>
188                 </description>
189         </function>
190         <function name="SPEECH_TEXT" language="en_US">
191                 <synopsis>
192                         Gets the recognized text of a result.
193                 </synopsis>
194                 <syntax argsep="/">
195                         <parameter name="nbest_number" />
196                         <parameter name="result_number" required="true" />
197                 </syntax>
198                 <description>
199                         <para>Gets the recognized text of a result.</para>
200                 </description>
201         </function>
202         <function name="SPEECH_GRAMMAR" language="en_US">
203                 <synopsis>
204                         Gets the matched grammar of a result if available.
205                 </synopsis>
206                 <syntax argsep="/">
207                         <parameter name="nbest_number" />
208                         <parameter name="result_number" required="true" />
209                 </syntax>
210                 <description>
211                         <para>Gets the matched grammar of a result if available.</para>
212                 </description>
213         </function>
214         <function name="SPEECH_ENGINE" language="en_US">
215                 <synopsis>
216                         Get or change a speech engine specific attribute.
217                 </synopsis>
218                 <syntax>
219                         <parameter name="name" required="true" />
220                 </syntax>
221                 <description>
222                         <para>Changes a speech engine specific attribute.</para>
223                 </description>
224         </function>
225         <function name="SPEECH_RESULTS_TYPE" language="en_US">
226                 <synopsis>
227                         Sets the type of results that will be returned.
228                 </synopsis>
229                 <syntax />
230                 <description>
231                         <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
232                 </description>
233         </function>
234         <function name="SPEECH" language="en_US">
235                 <synopsis>
236                         Gets information about speech recognition results.
237                 </synopsis>
238                 <syntax>
239                         <parameter name="argument" required="true">
240                                 <enumlist>
241                                         <enum name="status">
242                                                 <para>Returns <literal>1</literal> upon speech object existing,
243                                                 or <literal>0</literal> if not</para>
244                                         </enum>
245                                         <enum name="spoke">
246                                                 <para>Returns <literal>1</literal> if spoker spoke,
247                                                 or <literal>0</literal> if not</para>
248                                         </enum>
249                                         <enum name="results">
250                                                 <para>Returns number of results that were recognized.</para>
251                                         </enum>
252                                 </enumlist>
253                         </parameter>
254                 </syntax>
255                 <description>
256                         <para>Gets information about speech recognition results.</para>
257                 </description>
258         </function>
259  ***/
260
261 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
262 static void destroy_callback(void *data)
263 {
264         struct ast_speech *speech = (struct ast_speech*)data;
265
266         if (speech == NULL) {
267                 return;
268         }
269
270         /* Deallocate now */
271         ast_speech_destroy(speech);
272
273         return;
274 }
275
276 /*! \brief Static structure for datastore information */
277 static const struct ast_datastore_info speech_datastore = {
278         .type = "speech",
279         .destroy = destroy_callback
280 };
281
282 /*! \brief Helper function used to find the speech structure attached to a channel */
283 static struct ast_speech *find_speech(struct ast_channel *chan)
284 {
285         struct ast_speech *speech = NULL;
286         struct ast_datastore *datastore = NULL;
287         
288         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
289         if (datastore == NULL) {
290                 return NULL;
291         }
292         speech = datastore->data;
293
294         return speech;
295 }
296
297 /* Helper function to find a specific speech recognition result by number and nbest alternative */
298 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
299 {
300         struct ast_speech_result *result = results;
301         char *tmp = NULL;
302         int nbest_num = 0, wanted_num = 0, i = 0;
303
304         if (!result) {
305                 return NULL;
306         }
307
308         if ((tmp = strchr(result_num, '/'))) {
309                 *tmp++ = '\0';
310                 nbest_num = atoi(result_num);
311                 wanted_num = atoi(tmp);
312         } else {
313                 wanted_num = atoi(result_num);
314         }
315
316         do {
317                 if (result->nbest_num != nbest_num)
318                         continue;
319                 if (i == wanted_num)
320                         break;
321                 i++;
322         } while ((result = AST_LIST_NEXT(result, list)));
323
324         return result;
325 }
326
327 /*! \brief SPEECH_SCORE() Dialplan Function */
328 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
329                        char *buf, size_t len)
330 {
331         struct ast_speech_result *result = NULL;
332         struct ast_speech *speech = find_speech(chan);
333         char tmp[128] = "";
334
335         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
336                 return -1;
337         }
338         
339         snprintf(tmp, sizeof(tmp), "%d", result->score);
340         
341         ast_copy_string(buf, tmp, len);
342
343         return 0;
344 }
345
346 static struct ast_custom_function speech_score_function = {
347         .name = "SPEECH_SCORE",
348         .read = speech_score,
349         .write = NULL,
350 };
351
352 /*! \brief SPEECH_TEXT() Dialplan Function */
353 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
354                         char *buf, size_t len)
355 {
356         struct ast_speech_result *result = NULL;
357         struct ast_speech *speech = find_speech(chan);
358
359         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
360                 return -1;
361         }
362
363         if (result->text != NULL) {
364                 ast_copy_string(buf, result->text, len);
365         } else {
366                 buf[0] = '\0';
367         }
368
369         return 0;
370 }
371
372 static struct ast_custom_function speech_text_function = {
373         .name = "SPEECH_TEXT",
374         .read = speech_text,
375         .write = NULL,
376 };
377
378 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
379 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
380                         char *buf, size_t len)
381 {
382         struct ast_speech_result *result = NULL;
383         struct ast_speech *speech = find_speech(chan);
384
385         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
386                 return -1;
387         }
388
389         if (result->grammar != NULL) {
390                 ast_copy_string(buf, result->grammar, len);
391         } else {
392                 buf[0] = '\0';
393         }
394
395         return 0;
396 }
397
398 static struct ast_custom_function speech_grammar_function = {
399         .name = "SPEECH_GRAMMAR",
400         .read = speech_grammar,
401         .write = NULL,
402 };
403
404 /*! \brief SPEECH_ENGINE() Dialplan Set Function */
405 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
406 {
407         struct ast_speech *speech = find_speech(chan);
408
409         if (data == NULL || speech == NULL) {
410                 return -1;
411         }
412
413         ast_speech_change(speech, data, value);
414
415         return 0;
416 }
417
418 /*! \brief SPEECH_ENGINE() Dialplan Get Function */
419 static int speech_engine_read(struct ast_channel *chan, const char *cmd, char *data, char *buf, size_t len)
420 {
421         struct ast_speech *speech = find_speech(chan);
422
423         if (!data || !speech) {
424                 return -1;
425         }
426
427         return ast_speech_get_setting(speech, data, buf, len);
428 }
429
430 static struct ast_custom_function speech_engine_function = {
431         .name = "SPEECH_ENGINE",
432         .read = speech_engine_read,
433         .write = speech_engine_write,
434 };
435
436 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
437 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
438 {
439         struct ast_speech *speech = find_speech(chan);
440
441         if (data == NULL || speech == NULL)
442                 return -1;
443
444         if (!strcasecmp(value, "normal"))
445                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
446         else if (!strcasecmp(value, "nbest"))
447                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
448
449         return 0;
450 }
451
452 static struct ast_custom_function speech_results_type_function = {
453         .name = "SPEECH_RESULTS_TYPE",
454         .read = NULL,
455         .write = speech_results_type_write,
456 };
457
458 /*! \brief SPEECH() Dialplan Function */
459 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
460                         char *buf, size_t len)
461 {
462         int results = 0;
463         struct ast_speech_result *result = NULL;
464         struct ast_speech *speech = find_speech(chan);
465         char tmp[128] = "";
466
467         /* Now go for the various options */
468         if (!strcasecmp(data, "status")) {
469                 if (speech != NULL)
470                         ast_copy_string(buf, "1", len);
471                 else
472                         ast_copy_string(buf, "0", len);
473                 return 0;
474         }
475
476         /* Make sure we have a speech structure for everything else */
477         if (speech == NULL) {
478                 return -1;
479         }
480
481         /* Check to see if they are checking for silence */
482         if (!strcasecmp(data, "spoke")) {
483                 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
484                         ast_copy_string(buf, "1", len);
485                 else
486                         ast_copy_string(buf, "0", len);
487         } else if (!strcasecmp(data, "results")) {
488                 /* Count number of results */
489                 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
490                         results++;
491                 snprintf(tmp, sizeof(tmp), "%d", results);
492                 ast_copy_string(buf, tmp, len);
493         } else {
494                 buf[0] = '\0';
495         }
496
497         return 0;
498 }
499
500 static struct ast_custom_function speech_function = {
501         .name = "SPEECH",
502         .read = speech_read,
503         .write = NULL,
504 };
505
506
507
508 /*! \brief SpeechCreate() Dialplan Application */
509 static int speech_create(struct ast_channel *chan, const char *data)
510 {
511         struct ast_speech *speech = NULL;
512         struct ast_datastore *datastore = NULL;
513
514         /* Request a speech object */
515         speech = ast_speech_new(data, ast_channel_nativeformats(chan));
516         if (speech == NULL) {
517                 /* Not available */
518                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
519                 return 0;
520         }
521
522         datastore = ast_datastore_alloc(&speech_datastore, NULL);
523         if (datastore == NULL) {
524                 ast_speech_destroy(speech);
525                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
526                 return 0;
527         }
528         pbx_builtin_setvar_helper(chan, "ERROR", NULL);
529         datastore->data = speech;
530         ast_channel_datastore_add(chan, datastore);
531
532         return 0;
533 }
534
535 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
536 static int speech_load(struct ast_channel *chan, const char *vdata)
537 {
538         int res = 0;
539         struct ast_speech *speech = find_speech(chan);
540         char *data;
541         AST_DECLARE_APP_ARGS(args,
542                 AST_APP_ARG(grammar);
543                 AST_APP_ARG(path);
544         );
545
546         data = ast_strdupa(vdata);
547         AST_STANDARD_APP_ARGS(args, data);
548
549         if (speech == NULL)
550                 return -1;
551
552         if (args.argc != 2)
553                 return -1;
554
555         /* Load the grammar locally on the object */
556         res = ast_speech_grammar_load(speech, args.grammar, args.path);
557
558         return res;
559 }
560
561 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
562 static int speech_unload(struct ast_channel *chan, const char *data)
563 {
564         int res = 0;
565         struct ast_speech *speech = find_speech(chan);
566
567         if (speech == NULL)
568                 return -1;
569
570         /* Unload the grammar */
571         res = ast_speech_grammar_unload(speech, data);
572
573         return res;
574 }
575
576 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
577 static int speech_deactivate(struct ast_channel *chan, const char *data)
578 {
579         int res = 0;
580         struct ast_speech *speech = find_speech(chan);
581
582         if (speech == NULL)
583                 return -1;
584
585         /* Deactivate the grammar on the speech object */
586         res = ast_speech_grammar_deactivate(speech, data);
587
588         return res;
589 }
590
591 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
592 static int speech_activate(struct ast_channel *chan, const char *data)
593 {
594         int res = 0;
595         struct ast_speech *speech = find_speech(chan);
596
597         if (speech == NULL)
598                 return -1;
599
600         /* Activate the grammar on the speech object */
601         res = ast_speech_grammar_activate(speech, data);
602
603         return res;
604 }
605
606 /*! \brief SpeechStart() Dialplan Application */
607 static int speech_start(struct ast_channel *chan, const char *data)
608 {
609         int res = 0;
610         struct ast_speech *speech = find_speech(chan);
611
612         if (speech == NULL)
613                 return -1;
614
615         ast_speech_start(speech);
616
617         return res;
618 }
619
620 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
621 static int speech_processing_sound(struct ast_channel *chan, const char *data)
622 {
623         int res = 0;
624         struct ast_speech *speech = find_speech(chan);
625
626         if (speech == NULL)
627                 return -1;
628
629         if (speech->processing_sound != NULL) {
630                 ast_free(speech->processing_sound);
631                 speech->processing_sound = NULL;
632         }
633
634         speech->processing_sound = ast_strdup(data);
635
636         return res;
637 }
638
639 /*! \brief Helper function used by speech_background to playback a soundfile */
640 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
641 {
642         struct ast_filestream *fs = NULL;
643
644         if (!(fs = ast_openstream(chan, filename, preflang)))
645                 return -1;
646         
647         if (ast_applystream(chan, fs))
648                 return -1;
649         
650         ast_playstream(fs);
651
652         return 0;
653 }
654
655 enum {
656         SB_OPT_NOANSWER = (1 << 0),
657 };
658
659 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
660         AST_APP_OPTION('n', SB_OPT_NOANSWER),
661 END_OPTIONS );
662
663 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
664 static int speech_background(struct ast_channel *chan, const char *data)
665 {
666         unsigned int timeout = 0;
667         int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
668         struct ast_speech *speech = find_speech(chan);
669         struct ast_frame *f = NULL;
670         struct ast_format oldreadformat;
671         char dtmf[AST_MAX_EXTENSION] = "";
672         struct timeval start = { 0, 0 }, current;
673         struct ast_datastore *datastore = NULL;
674         char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
675         const char *tmp2 = NULL;
676         struct ast_flags options = { 0 };
677         AST_DECLARE_APP_ARGS(args,
678                 AST_APP_ARG(soundfile);
679                 AST_APP_ARG(timeout);
680                 AST_APP_ARG(options);
681         );
682
683         parse = ast_strdupa(data);
684         AST_STANDARD_APP_ARGS(args, parse);
685
686         ast_format_clear(&oldreadformat);
687         if (speech == NULL)
688                 return -1;
689
690         if (!ast_strlen_zero(args.options)) {
691                 char *options_buf = ast_strdupa(args.options);
692                 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
693         }
694
695         /* If channel is not already answered, then answer it */
696         if (ast_channel_state(chan) != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
697                 && ast_answer(chan)) {
698                         return -1;
699         }
700
701         /* Record old read format */
702         ast_format_copy(&oldreadformat, ast_channel_readformat(chan));
703
704         /* Change read format to be signed linear */
705         if (ast_set_read_format(chan, &speech->format))
706                 return -1;
707
708         if (!ast_strlen_zero(args.soundfile)) {
709                 /* Yay sound file */
710                 filename_tmp = ast_strdupa(args.soundfile);
711                 if (!ast_strlen_zero(args.timeout)) {
712                         if ((timeout = atof(args.timeout) * 1000.0) == 0)
713                                 timeout = -1;
714                 } else
715                         timeout = 0;
716         }
717
718         /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
719         ast_channel_lock(chan);
720         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
721                 max_dtmf_len = atoi(tmp2);
722         }
723         
724         /* See if a terminator is specified */
725         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
726                 if (ast_strlen_zero(tmp2))
727                         dtmf_terminator = '\0';
728                 else
729                         dtmf_terminator = tmp2[0];
730         }
731         ast_channel_unlock(chan);
732
733         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
734         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
735                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
736                 ast_speech_start(speech);
737         }
738
739         /* Ensure no streams are currently running */
740         ast_stopstream(chan);
741
742         /* Okay it's streaming so go into a loop grabbing frames! */
743         while (done == 0) {
744                 /* If the filename is null and stream is not running, start up a new sound file */
745                 if (!quieted && (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) && (filename = strsep(&filename_tmp, "&"))) {
746                         /* Discard old stream information */
747                         ast_stopstream(chan);
748                         /* Start new stream */
749                         speech_streamfile(chan, filename, ast_channel_language(chan));
750                 }
751
752                 /* Run scheduled stuff */
753                 ast_sched_runq(ast_channel_sched(chan));
754
755                 /* Yay scheduling */
756                 res = ast_sched_wait(ast_channel_sched(chan));
757                 if (res < 0)
758                         res = 1000;
759
760                 /* If there is a frame waiting, get it - if not - oh well */
761                 if (ast_waitfor(chan, res) > 0) {
762                         f = ast_read(chan);
763                         if (f == NULL) {
764                                 /* The channel has hung up most likely */
765                                 done = 3;
766                                 break;
767                         }
768                 }
769
770                 /* Do timeout check (shared between audio/dtmf) */
771                 if ((!quieted || strlen(dtmf)) && started == 1) {
772                         current = ast_tvnow();
773                         if ((ast_tvdiff_ms(current, start)) >= timeout) {
774                                 done = 1;
775                                 if (f)
776                                         ast_frfree(f);
777                                 break;
778                         }
779                 }
780
781                 /* Do checks on speech structure to see if it's changed */
782                 ast_mutex_lock(&speech->lock);
783                 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
784                         if (ast_channel_stream(chan))
785                                 ast_stopstream(chan);
786                         ast_clear_flag(speech, AST_SPEECH_QUIET);
787                         quieted = 1;
788                 }
789                 /* Check state so we can see what to do */
790                 switch (speech->state) {
791                 case AST_SPEECH_STATE_READY:
792                         /* If audio playback has stopped do a check for timeout purposes */
793                         if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL)
794                                 ast_stopstream(chan);
795                         if (!quieted && ast_channel_stream(chan) == NULL && timeout && started == 0 && !filename_tmp) {
796                                 if (timeout == -1) {
797                                         done = 1;
798                                         if (f)
799                                                 ast_frfree(f);
800                                         break;
801                                 }
802                                 start = ast_tvnow();
803                                 started = 1;
804                         }
805                         /* Write audio frame out to speech engine if no DTMF has been received */
806                         if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
807                                 ast_speech_write(speech, f->data.ptr, f->datalen);
808                         }
809                         break;
810                 case AST_SPEECH_STATE_WAIT:
811                         /* Cue up waiting sound if not already playing */
812                         if (!strlen(dtmf)) {
813                                 if (ast_channel_stream(chan) == NULL) {
814                                         if (speech->processing_sound != NULL) {
815                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
816                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
817                                                 }
818                                         }
819                                 } else if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) {
820                                         ast_stopstream(chan);
821                                         if (speech->processing_sound != NULL) {
822                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
823                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
824                                                 }
825                                         }
826                                 }
827                         }
828                         break;
829                 case AST_SPEECH_STATE_DONE:
830                         /* Now that we are done... let's switch back to not ready state */
831                         ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
832                         if (!strlen(dtmf)) {
833                                 /* Copy to speech structure the results, if available */
834                                 speech->results = ast_speech_results_get(speech);
835                                 /* Break out of our background too */
836                                 done = 1;
837                                 /* Stop audio playback */
838                                 if (ast_channel_stream(chan) != NULL) {
839                                         ast_stopstream(chan);
840                                 }
841                         }
842                         break;
843                 default:
844                         break;
845                 }
846                 ast_mutex_unlock(&speech->lock);
847
848                 /* Deal with other frame types */
849                 if (f != NULL) {
850                         /* Free the frame we received */
851                         switch (f->frametype) {
852                         case AST_FRAME_DTMF:
853                                 if (dtmf_terminator != '\0' && f->subclass.integer == dtmf_terminator) {
854                                         done = 1;
855                                 } else {
856                                         quieted = 1;
857                                         if (ast_channel_stream(chan) != NULL) {
858                                                 ast_stopstream(chan);
859                                         }
860                                         if (!started) {
861                                                 /* Change timeout to be 5 seconds for DTMF input */
862                                                 timeout = (ast_channel_pbx(chan) && ast_channel_pbx(chan)->dtimeoutms) ? ast_channel_pbx(chan)->dtimeoutms : 5000;
863                                                 started = 1;
864                                         }
865                                         start = ast_tvnow();
866                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass.integer);
867                                         strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
868                                         /* If the maximum length of the DTMF has been reached, stop now */
869                                         if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
870                                                 done = 1;
871                                 }
872                                 break;
873                         case AST_FRAME_CONTROL:
874                                 switch (f->subclass.integer) {
875                                 case AST_CONTROL_HANGUP:
876                                         /* Since they hung up we should destroy the speech structure */
877                                         done = 3;
878                                 default:
879                                         break;
880                                 }
881                         default:
882                                 break;
883                         }
884                         ast_frfree(f);
885                         f = NULL;
886                 }
887         }
888
889         if (!ast_strlen_zero(dtmf)) {
890                 /* We sort of make a results entry */
891                 speech->results = ast_calloc(1, sizeof(*speech->results));
892                 if (speech->results != NULL) {
893                         ast_speech_dtmf(speech, dtmf);
894                         speech->results->score = 1000;
895                         speech->results->text = ast_strdup(dtmf);
896                         speech->results->grammar = ast_strdup("dtmf");
897                 }
898                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
899         }
900
901         /* See if it was because they hung up */
902         if (done == 3) {
903                 /* Destroy speech structure */
904                 ast_speech_destroy(speech);
905                 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
906                 if (datastore != NULL)
907                         ast_channel_datastore_remove(chan, datastore);
908         } else {
909                 /* Channel is okay so restore read format */
910                 ast_set_read_format(chan, &oldreadformat);
911         }
912
913         return 0;
914 }
915
916
917 /*! \brief SpeechDestroy() Dialplan Application */
918 static int speech_destroy(struct ast_channel *chan, const char *data)
919 {
920         int res = 0;
921         struct ast_speech *speech = find_speech(chan);
922         struct ast_datastore *datastore = NULL;
923
924         if (speech == NULL)
925                 return -1;
926
927         /* Destroy speech structure */
928         ast_speech_destroy(speech);
929
930         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
931         if (datastore != NULL) {
932                 ast_channel_datastore_remove(chan, datastore);
933         }
934
935         return res;
936 }
937
938 static int unload_module(void)
939 {
940         int res = 0;
941
942         res = ast_unregister_application("SpeechCreate");
943         res |= ast_unregister_application("SpeechLoadGrammar");
944         res |= ast_unregister_application("SpeechUnloadGrammar");
945         res |= ast_unregister_application("SpeechActivateGrammar");
946         res |= ast_unregister_application("SpeechDeactivateGrammar");
947         res |= ast_unregister_application("SpeechStart");
948         res |= ast_unregister_application("SpeechBackground");
949         res |= ast_unregister_application("SpeechDestroy");
950         res |= ast_unregister_application("SpeechProcessingSound");
951         res |= ast_custom_function_unregister(&speech_function);
952         res |= ast_custom_function_unregister(&speech_score_function);
953         res |= ast_custom_function_unregister(&speech_text_function);
954         res |= ast_custom_function_unregister(&speech_grammar_function);
955         res |= ast_custom_function_unregister(&speech_engine_function);
956         res |= ast_custom_function_unregister(&speech_results_type_function);
957
958         return res;     
959 }
960
961 static int load_module(void)
962 {
963         int res = 0;
964
965         res = ast_register_application_xml("SpeechCreate", speech_create);
966         res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
967         res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
968         res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
969         res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
970         res |= ast_register_application_xml("SpeechStart", speech_start);
971         res |= ast_register_application_xml("SpeechBackground", speech_background);
972         res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
973         res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
974         res |= ast_custom_function_register(&speech_function);
975         res |= ast_custom_function_register(&speech_score_function);
976         res |= ast_custom_function_register(&speech_text_function);
977         res |= ast_custom_function_register(&speech_grammar_function);
978         res |= ast_custom_function_register(&speech_engine_function);
979         res |= ast_custom_function_register(&speech_results_type_function);
980
981         return res;
982 }
983
984 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Dialplan Speech Applications",
985                 .load = load_module,
986                 .unload = unload_module,
987                 .nonoptreq = "res_speech",
988                 );