81afa88ed0fa313907222c8725fc3477a2ace77c
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 /*** MODULEINFO
29         <support_level>core</support_level>
30         <depend>res_speech</depend>
31  ***/
32
33 #include "asterisk.h"
34
35 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
36
37 #include "asterisk/file.h"
38 #include "asterisk/channel.h"
39 #include "asterisk/pbx.h"
40 #include "asterisk/module.h"
41 #include "asterisk/lock.h"
42 #include "asterisk/app.h"
43 #include "asterisk/speech.h"
44
45 /*** DOCUMENTATION
46         <application name="SpeechCreate" language="en_US">
47                 <synopsis>
48                         Create a Speech Structure.
49                 </synopsis>
50                 <syntax>
51                         <parameter name="engine_name" required="true" />
52                 </syntax>
53                 <description>
54                         <para>This application creates information to be used by all the other applications.
55                         It must be called before doing any speech recognition activities such as activating a grammar.
56                         It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
57                         <para>Sets the ERROR channel variable to 1 if the engine cannot be used.</para>
58                 </description>
59         </application>
60         <application name="SpeechActivateGrammar" language="en_US">
61                 <synopsis>
62                         Activate a grammar.
63                 </synopsis>
64                 <syntax>
65                         <parameter name="grammar_name" required="true" />
66                 </syntax>
67                 <description>
68                         <para>This activates the specified grammar to be recognized by the engine.
69                         A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
70                         in the dialplan. The grammar name is the only argument to this application.</para>
71                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
72                 </description>
73         </application>
74         <application name="SpeechStart" language="en_US">
75                 <synopsis>
76                         Start recognizing voice in the audio stream.
77                 </synopsis>
78                 <syntax />
79                 <description>
80                         <para>Tell the speech recognition engine that it should start trying to get results from audio being
81                         fed to it.</para>
82                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
83                 </description>
84         </application>
85         <application name="SpeechBackground" language="en_US">
86                 <synopsis>
87                         Play a sound file and wait for speech to be recognized.
88                 </synopsis>
89                 <syntax>
90                         <parameter name="sound_file" required="true" />
91                         <parameter name="timeout">
92                                 <para>Timeout integer in seconds. Note the timeout will only start
93                                 once the sound file has stopped playing.</para>
94                         </parameter>
95                         <parameter name="options">
96                                 <optionlist>
97                                         <option name="n">
98                                                 <para>Don't answer the channel if it has not already been answered.</para>
99                                         </option>
100                                 </optionlist>
101                         </parameter>
102                 </syntax>
103                 <description>
104                         <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
105                         of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
106                         the speech recognition engine is working. Once results are available the application returns and results
107                         (score and text) are available using dialplan functions.</para>
108                         <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
109                         and ${SPEECH_SCORE(1)}.</para>
110                         <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
111                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
112                         
113                 </description>
114         </application>
115         <application name="SpeechDeactivateGrammar" language="en_US">
116                 <synopsis>
117                         Deactivate a grammar.
118                 </synopsis>
119                 <syntax>
120                         <parameter name="grammar_name" required="true">
121                                 <para>The grammar name to deactivate</para>
122                         </parameter>
123                 </syntax>
124                 <description>
125                         <para>This deactivates the specified grammar so that it is no longer recognized.</para>
126                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
127                 </description>
128         </application>
129         <application name="SpeechProcessingSound" language="en_US">
130                 <synopsis>
131                         Change background processing sound.
132                 </synopsis>
133                 <syntax>
134                         <parameter name="sound_file" required="true" />
135                 </syntax>
136                 <description>
137                         <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
138                         processing and working to get results.</para>
139                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
140                 </description>
141         </application>
142         <application name="SpeechDestroy" language="en_US">
143                 <synopsis>
144                         End speech recognition.
145                 </synopsis>
146                 <syntax />
147                 <description>
148                         <para>This destroys the information used by all the other speech recognition applications.
149                         If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
150                         again before calling any other application.</para>
151                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
152                 </description>
153         </application>
154         <application name="SpeechLoadGrammar" language="en_US">
155                 <synopsis>
156                         Load a grammar.
157                 </synopsis>
158                 <syntax>
159                         <parameter name="grammar_name" required="true" />
160                         <parameter name="path" required="true" />
161                 </syntax>
162                 <description>
163                         <para>Load a grammar only on the channel, not globally.</para>
164                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
165                 </description>
166         </application>
167         <application name="SpeechUnloadGrammar" language="en_US">
168                 <synopsis>
169                         Unload a grammar.
170                 </synopsis>
171                 <syntax>
172                         <parameter name="grammar_name" required="true" />
173                 </syntax>
174                 <description>
175                         <para>Unload a grammar.</para>
176                         <para>Hangs up the channel on failure. If this is not desired, use TryExec.</para>
177                 </description>
178         </application>
179         <function name="SPEECH_SCORE" language="en_US">
180                 <synopsis>
181                         Gets the confidence score of a result.
182                 </synopsis>
183                 <syntax argsep="/">
184                         <parameter name="nbest_number" />
185                         <parameter name="result_number" required="true" />
186                 </syntax>
187                 <description>
188                         <para>Gets the confidence score of a result.</para>
189                 </description>
190         </function>
191         <function name="SPEECH_TEXT" language="en_US">
192                 <synopsis>
193                         Gets the recognized text of a result.
194                 </synopsis>
195                 <syntax argsep="/">
196                         <parameter name="nbest_number" />
197                         <parameter name="result_number" required="true" />
198                 </syntax>
199                 <description>
200                         <para>Gets the recognized text of a result.</para>
201                 </description>
202         </function>
203         <function name="SPEECH_GRAMMAR" language="en_US">
204                 <synopsis>
205                         Gets the matched grammar of a result if available.
206                 </synopsis>
207                 <syntax argsep="/">
208                         <parameter name="nbest_number" />
209                         <parameter name="result_number" required="true" />
210                 </syntax>
211                 <description>
212                         <para>Gets the matched grammar of a result if available.</para>
213                 </description>
214         </function>
215         <function name="SPEECH_ENGINE" language="en_US">
216                 <synopsis>
217                         Get or change a speech engine specific attribute.
218                 </synopsis>
219                 <syntax>
220                         <parameter name="name" required="true" />
221                 </syntax>
222                 <description>
223                         <para>Changes a speech engine specific attribute.</para>
224                 </description>
225         </function>
226         <function name="SPEECH_RESULTS_TYPE" language="en_US">
227                 <synopsis>
228                         Sets the type of results that will be returned.
229                 </synopsis>
230                 <syntax />
231                 <description>
232                         <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
233                 </description>
234         </function>
235         <function name="SPEECH" language="en_US">
236                 <synopsis>
237                         Gets information about speech recognition results.
238                 </synopsis>
239                 <syntax>
240                         <parameter name="argument" required="true">
241                                 <enumlist>
242                                         <enum name="status">
243                                                 <para>Returns <literal>1</literal> upon speech object existing,
244                                                 or <literal>0</literal> if not</para>
245                                         </enum>
246                                         <enum name="spoke">
247                                                 <para>Returns <literal>1</literal> if spoker spoke,
248                                                 or <literal>0</literal> if not</para>
249                                         </enum>
250                                         <enum name="results">
251                                                 <para>Returns number of results that were recognized.</para>
252                                         </enum>
253                                 </enumlist>
254                         </parameter>
255                 </syntax>
256                 <description>
257                         <para>Gets information about speech recognition results.</para>
258                 </description>
259         </function>
260  ***/
261
262 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
263 static void destroy_callback(void *data)
264 {
265         struct ast_speech *speech = (struct ast_speech*)data;
266
267         if (speech == NULL) {
268                 return;
269         }
270
271         /* Deallocate now */
272         ast_speech_destroy(speech);
273
274         return;
275 }
276
277 /*! \brief Static structure for datastore information */
278 static const struct ast_datastore_info speech_datastore = {
279         .type = "speech",
280         .destroy = destroy_callback
281 };
282
283 /*! \brief Helper function used to find the speech structure attached to a channel */
284 static struct ast_speech *find_speech(struct ast_channel *chan)
285 {
286         struct ast_speech *speech = NULL;
287         struct ast_datastore *datastore = NULL;
288         
289         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
290         if (datastore == NULL) {
291                 return NULL;
292         }
293         speech = datastore->data;
294
295         return speech;
296 }
297
298 /* Helper function to find a specific speech recognition result by number and nbest alternative */
299 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
300 {
301         struct ast_speech_result *result = results;
302         char *tmp = NULL;
303         int nbest_num = 0, wanted_num = 0, i = 0;
304
305         if (!result) {
306                 return NULL;
307         }
308
309         if ((tmp = strchr(result_num, '/'))) {
310                 *tmp++ = '\0';
311                 nbest_num = atoi(result_num);
312                 wanted_num = atoi(tmp);
313         } else {
314                 wanted_num = atoi(result_num);
315         }
316
317         do {
318                 if (result->nbest_num != nbest_num)
319                         continue;
320                 if (i == wanted_num)
321                         break;
322                 i++;
323         } while ((result = AST_LIST_NEXT(result, list)));
324
325         return result;
326 }
327
328 /*! \brief SPEECH_SCORE() Dialplan Function */
329 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
330                        char *buf, size_t len)
331 {
332         struct ast_speech_result *result = NULL;
333         struct ast_speech *speech = find_speech(chan);
334         char tmp[128] = "";
335
336         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
337                 return -1;
338         }
339         
340         snprintf(tmp, sizeof(tmp), "%d", result->score);
341         
342         ast_copy_string(buf, tmp, len);
343
344         return 0;
345 }
346
347 static struct ast_custom_function speech_score_function = {
348         .name = "SPEECH_SCORE",
349         .read = speech_score,
350         .write = NULL,
351 };
352
353 /*! \brief SPEECH_TEXT() Dialplan Function */
354 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
355                         char *buf, size_t len)
356 {
357         struct ast_speech_result *result = NULL;
358         struct ast_speech *speech = find_speech(chan);
359
360         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
361                 return -1;
362         }
363
364         if (result->text != NULL) {
365                 ast_copy_string(buf, result->text, len);
366         } else {
367                 buf[0] = '\0';
368         }
369
370         return 0;
371 }
372
373 static struct ast_custom_function speech_text_function = {
374         .name = "SPEECH_TEXT",
375         .read = speech_text,
376         .write = NULL,
377 };
378
379 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
380 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
381                         char *buf, size_t len)
382 {
383         struct ast_speech_result *result = NULL;
384         struct ast_speech *speech = find_speech(chan);
385
386         if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
387                 return -1;
388         }
389
390         if (result->grammar != NULL) {
391                 ast_copy_string(buf, result->grammar, len);
392         } else {
393                 buf[0] = '\0';
394         }
395
396         return 0;
397 }
398
399 static struct ast_custom_function speech_grammar_function = {
400         .name = "SPEECH_GRAMMAR",
401         .read = speech_grammar,
402         .write = NULL,
403 };
404
405 /*! \brief SPEECH_ENGINE() Dialplan Set Function */
406 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
407 {
408         struct ast_speech *speech = find_speech(chan);
409
410         if (data == NULL || speech == NULL) {
411                 return -1;
412         }
413
414         ast_speech_change(speech, data, value);
415
416         return 0;
417 }
418
419 /*! \brief SPEECH_ENGINE() Dialplan Get Function */
420 static int speech_engine_read(struct ast_channel *chan, const char *cmd, char *data, char *buf, size_t len)
421 {
422         struct ast_speech *speech = find_speech(chan);
423
424         if (!data || !speech) {
425                 return -1;
426         }
427
428         return ast_speech_get_setting(speech, data, buf, len);
429 }
430
431 static struct ast_custom_function speech_engine_function = {
432         .name = "SPEECH_ENGINE",
433         .read = speech_engine_read,
434         .write = speech_engine_write,
435 };
436
437 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
438 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
439 {
440         struct ast_speech *speech = find_speech(chan);
441
442         if (data == NULL || speech == NULL)
443                 return -1;
444
445         if (!strcasecmp(value, "normal"))
446                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
447         else if (!strcasecmp(value, "nbest"))
448                 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
449
450         return 0;
451 }
452
453 static struct ast_custom_function speech_results_type_function = {
454         .name = "SPEECH_RESULTS_TYPE",
455         .read = NULL,
456         .write = speech_results_type_write,
457 };
458
459 /*! \brief SPEECH() Dialplan Function */
460 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
461                         char *buf, size_t len)
462 {
463         int results = 0;
464         struct ast_speech_result *result = NULL;
465         struct ast_speech *speech = find_speech(chan);
466         char tmp[128] = "";
467
468         /* Now go for the various options */
469         if (!strcasecmp(data, "status")) {
470                 if (speech != NULL)
471                         ast_copy_string(buf, "1", len);
472                 else
473                         ast_copy_string(buf, "0", len);
474                 return 0;
475         }
476
477         /* Make sure we have a speech structure for everything else */
478         if (speech == NULL) {
479                 return -1;
480         }
481
482         /* Check to see if they are checking for silence */
483         if (!strcasecmp(data, "spoke")) {
484                 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
485                         ast_copy_string(buf, "1", len);
486                 else
487                         ast_copy_string(buf, "0", len);
488         } else if (!strcasecmp(data, "results")) {
489                 /* Count number of results */
490                 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
491                         results++;
492                 snprintf(tmp, sizeof(tmp), "%d", results);
493                 ast_copy_string(buf, tmp, len);
494         } else {
495                 buf[0] = '\0';
496         }
497
498         return 0;
499 }
500
501 static struct ast_custom_function speech_function = {
502         .name = "SPEECH",
503         .read = speech_read,
504         .write = NULL,
505 };
506
507
508
509 /*! \brief SpeechCreate() Dialplan Application */
510 static int speech_create(struct ast_channel *chan, const char *data)
511 {
512         struct ast_speech *speech = NULL;
513         struct ast_datastore *datastore = NULL;
514
515         /* Request a speech object */
516         speech = ast_speech_new(data, ast_channel_nativeformats(chan));
517         if (speech == NULL) {
518                 /* Not available */
519                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
520                 return 0;
521         }
522
523         datastore = ast_datastore_alloc(&speech_datastore, NULL);
524         if (datastore == NULL) {
525                 ast_speech_destroy(speech);
526                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
527                 return 0;
528         }
529         pbx_builtin_setvar_helper(chan, "ERROR", NULL);
530         datastore->data = speech;
531         ast_channel_datastore_add(chan, datastore);
532
533         return 0;
534 }
535
536 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
537 static int speech_load(struct ast_channel *chan, const char *vdata)
538 {
539         int res = 0;
540         struct ast_speech *speech = find_speech(chan);
541         char *data;
542         AST_DECLARE_APP_ARGS(args,
543                 AST_APP_ARG(grammar);
544                 AST_APP_ARG(path);
545         );
546
547         data = ast_strdupa(vdata);
548         AST_STANDARD_APP_ARGS(args, data);
549
550         if (speech == NULL)
551                 return -1;
552
553         if (args.argc != 2)
554                 return -1;
555
556         /* Load the grammar locally on the object */
557         res = ast_speech_grammar_load(speech, args.grammar, args.path);
558
559         return res;
560 }
561
562 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
563 static int speech_unload(struct ast_channel *chan, const char *data)
564 {
565         int res = 0;
566         struct ast_speech *speech = find_speech(chan);
567
568         if (speech == NULL)
569                 return -1;
570
571         /* Unload the grammar */
572         res = ast_speech_grammar_unload(speech, data);
573
574         return res;
575 }
576
577 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
578 static int speech_deactivate(struct ast_channel *chan, const char *data)
579 {
580         int res = 0;
581         struct ast_speech *speech = find_speech(chan);
582
583         if (speech == NULL)
584                 return -1;
585
586         /* Deactivate the grammar on the speech object */
587         res = ast_speech_grammar_deactivate(speech, data);
588
589         return res;
590 }
591
592 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
593 static int speech_activate(struct ast_channel *chan, const char *data)
594 {
595         int res = 0;
596         struct ast_speech *speech = find_speech(chan);
597
598         if (speech == NULL)
599                 return -1;
600
601         /* Activate the grammar on the speech object */
602         res = ast_speech_grammar_activate(speech, data);
603
604         return res;
605 }
606
607 /*! \brief SpeechStart() Dialplan Application */
608 static int speech_start(struct ast_channel *chan, const char *data)
609 {
610         int res = 0;
611         struct ast_speech *speech = find_speech(chan);
612
613         if (speech == NULL)
614                 return -1;
615
616         ast_speech_start(speech);
617
618         return res;
619 }
620
621 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
622 static int speech_processing_sound(struct ast_channel *chan, const char *data)
623 {
624         int res = 0;
625         struct ast_speech *speech = find_speech(chan);
626
627         if (speech == NULL)
628                 return -1;
629
630         if (speech->processing_sound != NULL) {
631                 ast_free(speech->processing_sound);
632                 speech->processing_sound = NULL;
633         }
634
635         speech->processing_sound = ast_strdup(data);
636
637         return res;
638 }
639
640 /*! \brief Helper function used by speech_background to playback a soundfile */
641 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
642 {
643         struct ast_filestream *fs = NULL;
644
645         if (!(fs = ast_openstream(chan, filename, preflang)))
646                 return -1;
647         
648         if (ast_applystream(chan, fs))
649                 return -1;
650         
651         ast_playstream(fs);
652
653         return 0;
654 }
655
656 enum {
657         SB_OPT_NOANSWER = (1 << 0),
658 };
659
660 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
661         AST_APP_OPTION('n', SB_OPT_NOANSWER),
662 END_OPTIONS );
663
664 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
665 static int speech_background(struct ast_channel *chan, const char *data)
666 {
667         unsigned int timeout = 0;
668         int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
669         struct ast_speech *speech = find_speech(chan);
670         struct ast_frame *f = NULL;
671         struct ast_format oldreadformat;
672         char dtmf[AST_MAX_EXTENSION] = "";
673         struct timeval start = { 0, 0 }, current;
674         struct ast_datastore *datastore = NULL;
675         char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
676         const char *tmp2 = NULL;
677         struct ast_flags options = { 0 };
678         AST_DECLARE_APP_ARGS(args,
679                 AST_APP_ARG(soundfile);
680                 AST_APP_ARG(timeout);
681                 AST_APP_ARG(options);
682         );
683
684         parse = ast_strdupa(data);
685         AST_STANDARD_APP_ARGS(args, parse);
686
687         ast_format_clear(&oldreadformat);
688         if (speech == NULL)
689                 return -1;
690
691         if (!ast_strlen_zero(args.options)) {
692                 char *options_buf = ast_strdupa(args.options);
693                 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
694         }
695
696         /* If channel is not already answered, then answer it */
697         if (ast_channel_state(chan) != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
698                 && ast_answer(chan)) {
699                         return -1;
700         }
701
702         /* Record old read format */
703         ast_format_copy(&oldreadformat, ast_channel_readformat(chan));
704
705         /* Change read format to be signed linear */
706         if (ast_set_read_format(chan, &speech->format))
707                 return -1;
708
709         if (!ast_strlen_zero(args.soundfile)) {
710                 /* Yay sound file */
711                 filename_tmp = ast_strdupa(args.soundfile);
712                 if (!ast_strlen_zero(args.timeout)) {
713                         if ((timeout = atof(args.timeout) * 1000.0) == 0)
714                                 timeout = -1;
715                 } else
716                         timeout = 0;
717         }
718
719         /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
720         ast_channel_lock(chan);
721         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
722                 max_dtmf_len = atoi(tmp2);
723         }
724         
725         /* See if a terminator is specified */
726         if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
727                 if (ast_strlen_zero(tmp2))
728                         dtmf_terminator = '\0';
729                 else
730                         dtmf_terminator = tmp2[0];
731         }
732         ast_channel_unlock(chan);
733
734         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
735         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
736                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
737                 ast_speech_start(speech);
738         }
739
740         /* Ensure no streams are currently running */
741         ast_stopstream(chan);
742
743         /* Okay it's streaming so go into a loop grabbing frames! */
744         while (done == 0) {
745                 /* If the filename is null and stream is not running, start up a new sound file */
746                 if (!quieted && (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) && (filename = strsep(&filename_tmp, "&"))) {
747                         /* Discard old stream information */
748                         ast_stopstream(chan);
749                         /* Start new stream */
750                         speech_streamfile(chan, filename, ast_channel_language(chan));
751                 }
752
753                 /* Run scheduled stuff */
754                 ast_sched_runq(ast_channel_sched(chan));
755
756                 /* Yay scheduling */
757                 res = ast_sched_wait(ast_channel_sched(chan));
758                 if (res < 0)
759                         res = 1000;
760
761                 /* If there is a frame waiting, get it - if not - oh well */
762                 if (ast_waitfor(chan, res) > 0) {
763                         f = ast_read(chan);
764                         if (f == NULL) {
765                                 /* The channel has hung up most likely */
766                                 done = 3;
767                                 break;
768                         }
769                 }
770
771                 /* Do timeout check (shared between audio/dtmf) */
772                 if ((!quieted || strlen(dtmf)) && started == 1) {
773                         current = ast_tvnow();
774                         if ((ast_tvdiff_ms(current, start)) >= timeout) {
775                                 done = 1;
776                                 if (f)
777                                         ast_frfree(f);
778                                 break;
779                         }
780                 }
781
782                 /* Do checks on speech structure to see if it's changed */
783                 ast_mutex_lock(&speech->lock);
784                 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
785                         if (ast_channel_stream(chan))
786                                 ast_stopstream(chan);
787                         ast_clear_flag(speech, AST_SPEECH_QUIET);
788                         quieted = 1;
789                 }
790                 /* Check state so we can see what to do */
791                 switch (speech->state) {
792                 case AST_SPEECH_STATE_READY:
793                         /* If audio playback has stopped do a check for timeout purposes */
794                         if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL)
795                                 ast_stopstream(chan);
796                         if (!quieted && ast_channel_stream(chan) == NULL && timeout && started == 0 && !filename_tmp) {
797                                 if (timeout == -1) {
798                                         done = 1;
799                                         if (f)
800                                                 ast_frfree(f);
801                                         break;
802                                 }
803                                 start = ast_tvnow();
804                                 started = 1;
805                         }
806                         /* Write audio frame out to speech engine if no DTMF has been received */
807                         if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
808                                 ast_speech_write(speech, f->data.ptr, f->datalen);
809                         }
810                         break;
811                 case AST_SPEECH_STATE_WAIT:
812                         /* Cue up waiting sound if not already playing */
813                         if (!strlen(dtmf)) {
814                                 if (ast_channel_stream(chan) == NULL) {
815                                         if (speech->processing_sound != NULL) {
816                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
817                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
818                                                 }
819                                         }
820                                 } else if (ast_channel_streamid(chan) == -1 && ast_channel_timingfunc(chan) == NULL) {
821                                         ast_stopstream(chan);
822                                         if (speech->processing_sound != NULL) {
823                                                 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
824                                                         speech_streamfile(chan, speech->processing_sound, ast_channel_language(chan));
825                                                 }
826                                         }
827                                 }
828                         }
829                         break;
830                 case AST_SPEECH_STATE_DONE:
831                         /* Now that we are done... let's switch back to not ready state */
832                         ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
833                         if (!strlen(dtmf)) {
834                                 /* Copy to speech structure the results, if available */
835                                 speech->results = ast_speech_results_get(speech);
836                                 /* Break out of our background too */
837                                 done = 1;
838                                 /* Stop audio playback */
839                                 if (ast_channel_stream(chan) != NULL) {
840                                         ast_stopstream(chan);
841                                 }
842                         }
843                         break;
844                 default:
845                         break;
846                 }
847                 ast_mutex_unlock(&speech->lock);
848
849                 /* Deal with other frame types */
850                 if (f != NULL) {
851                         /* Free the frame we received */
852                         switch (f->frametype) {
853                         case AST_FRAME_DTMF:
854                                 if (dtmf_terminator != '\0' && f->subclass.integer == dtmf_terminator) {
855                                         done = 1;
856                                 } else {
857                                         quieted = 1;
858                                         if (ast_channel_stream(chan) != NULL) {
859                                                 ast_stopstream(chan);
860                                         }
861                                         if (!started) {
862                                                 /* Change timeout to be 5 seconds for DTMF input */
863                                                 timeout = (ast_channel_pbx(chan) && ast_channel_pbx(chan)->dtimeoutms) ? ast_channel_pbx(chan)->dtimeoutms : 5000;
864                                                 started = 1;
865                                         }
866                                         start = ast_tvnow();
867                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass.integer);
868                                         strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
869                                         /* If the maximum length of the DTMF has been reached, stop now */
870                                         if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
871                                                 done = 1;
872                                 }
873                                 break;
874                         case AST_FRAME_CONTROL:
875                                 switch (f->subclass.integer) {
876                                 case AST_CONTROL_HANGUP:
877                                         /* Since they hung up we should destroy the speech structure */
878                                         done = 3;
879                                 default:
880                                         break;
881                                 }
882                         default:
883                                 break;
884                         }
885                         ast_frfree(f);
886                         f = NULL;
887                 }
888         }
889
890         if (!ast_strlen_zero(dtmf)) {
891                 /* We sort of make a results entry */
892                 speech->results = ast_calloc(1, sizeof(*speech->results));
893                 if (speech->results != NULL) {
894                         ast_speech_dtmf(speech, dtmf);
895                         speech->results->score = 1000;
896                         speech->results->text = ast_strdup(dtmf);
897                         speech->results->grammar = ast_strdup("dtmf");
898                 }
899                 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
900         }
901
902         /* See if it was because they hung up */
903         if (done == 3) {
904                 /* Destroy speech structure */
905                 ast_speech_destroy(speech);
906                 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
907                 if (datastore != NULL)
908                         ast_channel_datastore_remove(chan, datastore);
909         } else {
910                 /* Channel is okay so restore read format */
911                 ast_set_read_format(chan, &oldreadformat);
912         }
913
914         return 0;
915 }
916
917
918 /*! \brief SpeechDestroy() Dialplan Application */
919 static int speech_destroy(struct ast_channel *chan, const char *data)
920 {
921         int res = 0;
922         struct ast_speech *speech = find_speech(chan);
923         struct ast_datastore *datastore = NULL;
924
925         if (speech == NULL)
926                 return -1;
927
928         /* Destroy speech structure */
929         ast_speech_destroy(speech);
930
931         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
932         if (datastore != NULL) {
933                 ast_channel_datastore_remove(chan, datastore);
934         }
935
936         return res;
937 }
938
939 static int unload_module(void)
940 {
941         int res = 0;
942
943         res = ast_unregister_application("SpeechCreate");
944         res |= ast_unregister_application("SpeechLoadGrammar");
945         res |= ast_unregister_application("SpeechUnloadGrammar");
946         res |= ast_unregister_application("SpeechActivateGrammar");
947         res |= ast_unregister_application("SpeechDeactivateGrammar");
948         res |= ast_unregister_application("SpeechStart");
949         res |= ast_unregister_application("SpeechBackground");
950         res |= ast_unregister_application("SpeechDestroy");
951         res |= ast_unregister_application("SpeechProcessingSound");
952         res |= ast_custom_function_unregister(&speech_function);
953         res |= ast_custom_function_unregister(&speech_score_function);
954         res |= ast_custom_function_unregister(&speech_text_function);
955         res |= ast_custom_function_unregister(&speech_grammar_function);
956         res |= ast_custom_function_unregister(&speech_engine_function);
957         res |= ast_custom_function_unregister(&speech_results_type_function);
958
959         return res;     
960 }
961
962 static int load_module(void)
963 {
964         int res = 0;
965
966         res = ast_register_application_xml("SpeechCreate", speech_create);
967         res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
968         res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
969         res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
970         res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
971         res |= ast_register_application_xml("SpeechStart", speech_start);
972         res |= ast_register_application_xml("SpeechBackground", speech_background);
973         res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
974         res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
975         res |= ast_custom_function_register(&speech_function);
976         res |= ast_custom_function_register(&speech_score_function);
977         res |= ast_custom_function_register(&speech_text_function);
978         res |= ast_custom_function_register(&speech_grammar_function);
979         res |= ast_custom_function_register(&speech_engine_function);
980         res |= ast_custom_function_register(&speech_results_type_function);
981
982         return res;
983 }
984
985 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Dialplan Speech Applications",
986                 .load = load_module,
987                 .unload = unload_module,
988                 .nonoptreq = "res_speech",
989                 );