2 * Asterisk -- An open source telephony toolkit.
4 * Copyright (C) 2006, Digium, Inc.
6 * Joshua Colp <jcolp@digium.com>
8 * See http://www.asterisk.org for more information about
9 * the Asterisk project. Please do not directly contact
10 * any of the maintainers of this project for assistance;
11 * the project provides a web site, mailing lists and IRC
12 * channels for your use.
14 * This program is free software, distributed under the terms of
15 * the GNU General Public License Version 2. See the LICENSE file
16 * at the top of the source tree.
21 * \brief Speech Recognition Utility Applications
23 * \author Joshua Colp <jcolp@digium.com>
25 * \ingroup applications
30 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
32 #include "asterisk/file.h"
33 #include "asterisk/channel.h"
34 #include "asterisk/pbx.h"
35 #include "asterisk/module.h"
36 #include "asterisk/lock.h"
37 #include "asterisk/app.h"
38 #include "asterisk/speech.h"
41 <application name="SpeechCreate" language="en_US">
43 Create a Speech Structure.
46 <parameter name="engine_name" required="true" />
49 <para>This application creates information to be used by all the other applications.
50 It must be called before doing any speech recognition activities such as activating a grammar.
51 It takes the engine name to use as the argument, if not specified the default engine will be used.</para>
54 <application name="SpeechActivateGrammar" language="en_US">
59 <parameter name="grammar_name" required="true" />
62 <para>This activates the specified grammar to be recognized by the engine.
63 A grammar tells the speech recognition engine what to recognize, and how to portray it back to you
64 in the dialplan. The grammar name is the only argument to this application.</para>
67 <application name="SpeechStart" language="en_US">
69 Start recognizing voice in the audio stream.
73 <para>Tell the speech recognition engine that it should start trying to get results from audio being
77 <application name="SpeechBackground" language="en_US">
79 Play a sound file and wait for speech to be recognized.
82 <parameter name="sound_file" required="true" />
83 <parameter name="timeout">
84 <para>Timeout integer in seconds. Note the timeout will only start
85 once the sound file has stopped playing.</para>
87 <parameter name="options">
90 <para>Don't answer the channel if it has not already been answered.</para>
96 <para>This application plays a sound file and waits for the person to speak. Once they start speaking playback
97 of the file stops, and silence is heard. Once they stop talking the processing sound is played to indicate
98 the speech recognition engine is working. Once results are available the application returns and results
99 (score and text) are available using dialplan functions.</para>
100 <para>The first text and score are ${SPEECH_TEXT(0)} AND ${SPEECH_SCORE(0)} while the second are ${SPEECH_TEXT(1)}
101 and ${SPEECH_SCORE(1)}.</para>
102 <para>The first argument is the sound file and the second is the timeout integer in seconds.</para>
106 <application name="SpeechDeactivateGrammar" language="en_US">
108 Deactivate a grammar.
111 <parameter name="grammar_name" required="true">
112 <para>The grammar name to deactivate</para>
116 <para>This deactivates the specified grammar so that it is no longer recognized.</para>
119 <application name="SpeechProcessingSound" language="en_US">
121 Change background processing sound.
124 <parameter name="sound_file" required="true" />
127 <para>This changes the processing sound that SpeechBackground plays back when the speech recognition engine is
128 processing and working to get results.</para>
131 <application name="SpeechDestroy" language="en_US">
133 End speech recognition.
137 <para>This destroys the information used by all the other speech recognition applications.
138 If you call this application but end up wanting to recognize more speech, you must call SpeechCreate()
139 again before calling any other application.</para>
142 <application name="SpeechLoadGrammar" language="en_US">
147 <parameter name="grammar_name" required="true" />
148 <parameter name="path" required="true" />
151 <para>Load a grammar only on the channel, not globally.</para>
154 <application name="SpeechUnloadGrammar" language="en_US">
159 <parameter name="grammar_name" required="true" />
162 <para>Unload a grammar.</para>
165 <function name="SPEECH_SCORE" language="en_US">
167 Gets the confidence score of a result.
170 <parameter name="nbest_number" />
171 <parameter name="result_number" required="true" />
174 <para>Gets the confidence score of a result.</para>
177 <function name="SPEECH_TEXT" language="en_US">
179 Gets the recognized text of a result.
182 <parameter name="nbest_number" />
183 <parameter name="result_number" required="true" />
186 <para>Gets the recognized text of a result.</para>
189 <function name="SPEECH_GRAMMAR" language="en_US">
191 Gets the matched grammar of a result if available.
194 <parameter name="nbest_number" />
195 <parameter name="result_number" required="true" />
198 <para>Gets the matched grammar of a result if available.</para>
201 <function name="SPEECH_ENGINE" language="en_US">
203 Change a speech engine specific attribute.
206 <parameter name="name" required="true" />
209 <para>Changes a speech engine specific attribute.</para>
212 <function name="SPEECH_RESULTS_TYPE" language="en_US">
214 Sets the type of results that will be returned.
218 <para>Sets the type of results that will be returned. Valid options are normal or nbest.</para>
221 <function name="SPEECH" language="en_US">
223 Gets information about speech recognition results.
226 <parameter name="argument" required="true">
229 <para>Returns <literal>1</literal> upon speech object existing,
230 or <literal>0</literal> if not</para>
233 <para>Returns <literal>1</literal> if spoker spoke,
234 or <literal>0</literal> if not</para>
236 <enum name="results">
237 <para>Returns number of results that were recognized.</para>
243 <para>Gets information about speech recognition results.</para>
248 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
249 static void destroy_callback(void *data)
251 struct ast_speech *speech = (struct ast_speech*)data;
253 if (speech == NULL) {
258 ast_speech_destroy(speech);
263 /*! \brief Static structure for datastore information */
264 static const struct ast_datastore_info speech_datastore = {
266 .destroy = destroy_callback
269 /*! \brief Helper function used to find the speech structure attached to a channel */
270 static struct ast_speech *find_speech(struct ast_channel *chan)
272 struct ast_speech *speech = NULL;
273 struct ast_datastore *datastore = NULL;
275 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
276 if (datastore == NULL) {
279 speech = datastore->data;
284 /* Helper function to find a specific speech recognition result by number and nbest alternative */
285 static struct ast_speech_result *find_result(struct ast_speech_result *results, char *result_num)
287 struct ast_speech_result *result = results;
289 int nbest_num = 0, wanted_num = 0, i = 0;
295 if ((tmp = strchr(result_num, '/'))) {
297 nbest_num = atoi(result_num);
298 wanted_num = atoi(tmp);
300 wanted_num = atoi(result_num);
304 if (result->nbest_num != nbest_num)
309 } while ((result = AST_LIST_NEXT(result, list)));
314 /*! \brief SPEECH_SCORE() Dialplan Function */
315 static int speech_score(struct ast_channel *chan, const char *cmd, char *data,
316 char *buf, size_t len)
318 struct ast_speech_result *result = NULL;
319 struct ast_speech *speech = find_speech(chan);
322 if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
326 snprintf(tmp, sizeof(tmp), "%d", result->score);
328 ast_copy_string(buf, tmp, len);
333 static struct ast_custom_function speech_score_function = {
334 .name = "SPEECH_SCORE",
335 .read = speech_score,
339 /*! \brief SPEECH_TEXT() Dialplan Function */
340 static int speech_text(struct ast_channel *chan, const char *cmd, char *data,
341 char *buf, size_t len)
343 struct ast_speech_result *result = NULL;
344 struct ast_speech *speech = find_speech(chan);
346 if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
350 if (result->text != NULL) {
351 ast_copy_string(buf, result->text, len);
359 static struct ast_custom_function speech_text_function = {
360 .name = "SPEECH_TEXT",
365 /*! \brief SPEECH_GRAMMAR() Dialplan Function */
366 static int speech_grammar(struct ast_channel *chan, const char *cmd, char *data,
367 char *buf, size_t len)
369 struct ast_speech_result *result = NULL;
370 struct ast_speech *speech = find_speech(chan);
372 if (data == NULL || speech == NULL || !(result = find_result(speech->results, data))) {
376 if (result->grammar != NULL) {
377 ast_copy_string(buf, result->grammar, len);
385 static struct ast_custom_function speech_grammar_function = {
386 .name = "SPEECH_GRAMMAR",
387 .read = speech_grammar,
391 /*! \brief SPEECH_ENGINE() Dialplan Function */
392 static int speech_engine_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
394 struct ast_speech *speech = find_speech(chan);
396 if (data == NULL || speech == NULL) {
400 ast_speech_change(speech, data, value);
405 static struct ast_custom_function speech_engine_function = {
406 .name = "SPEECH_ENGINE",
408 .write = speech_engine_write,
411 /*! \brief SPEECH_RESULTS_TYPE() Dialplan Function */
412 static int speech_results_type_write(struct ast_channel *chan, const char *cmd, char *data, const char *value)
414 struct ast_speech *speech = find_speech(chan);
416 if (data == NULL || speech == NULL)
419 if (!strcasecmp(value, "normal"))
420 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NORMAL);
421 else if (!strcasecmp(value, "nbest"))
422 ast_speech_change_results_type(speech, AST_SPEECH_RESULTS_TYPE_NBEST);
427 static struct ast_custom_function speech_results_type_function = {
428 .name = "SPEECH_RESULTS_TYPE",
430 .write = speech_results_type_write,
433 /*! \brief SPEECH() Dialplan Function */
434 static int speech_read(struct ast_channel *chan, const char *cmd, char *data,
435 char *buf, size_t len)
438 struct ast_speech_result *result = NULL;
439 struct ast_speech *speech = find_speech(chan);
442 /* Now go for the various options */
443 if (!strcasecmp(data, "status")) {
445 ast_copy_string(buf, "1", len);
447 ast_copy_string(buf, "0", len);
451 /* Make sure we have a speech structure for everything else */
452 if (speech == NULL) {
456 /* Check to see if they are checking for silence */
457 if (!strcasecmp(data, "spoke")) {
458 if (ast_test_flag(speech, AST_SPEECH_SPOKE))
459 ast_copy_string(buf, "1", len);
461 ast_copy_string(buf, "0", len);
462 } else if (!strcasecmp(data, "results")) {
463 /* Count number of results */
464 for (result = speech->results; result; result = AST_LIST_NEXT(result, list))
466 snprintf(tmp, sizeof(tmp), "%d", results);
467 ast_copy_string(buf, tmp, len);
475 static struct ast_custom_function speech_function = {
483 /*! \brief SpeechCreate() Dialplan Application */
484 static int speech_create(struct ast_channel *chan, void *data)
486 struct ast_speech *speech = NULL;
487 struct ast_datastore *datastore = NULL;
489 /* Request a speech object */
490 speech = ast_speech_new(data, chan->nativeformats);
491 if (speech == NULL) {
493 pbx_builtin_setvar_helper(chan, "ERROR", "1");
497 datastore = ast_datastore_alloc(&speech_datastore, NULL);
498 if (datastore == NULL) {
499 ast_speech_destroy(speech);
500 pbx_builtin_setvar_helper(chan, "ERROR", "1");
503 pbx_builtin_setvar_helper(chan, "ERROR", NULL);
504 datastore->data = speech;
505 ast_channel_datastore_add(chan, datastore);
510 /*! \brief SpeechLoadGrammar(Grammar Name,Path) Dialplan Application */
511 static int speech_load(struct ast_channel *chan, void *vdata)
514 struct ast_speech *speech = find_speech(chan);
516 AST_DECLARE_APP_ARGS(args,
517 AST_APP_ARG(grammar);
521 data = ast_strdupa(vdata);
522 AST_STANDARD_APP_ARGS(args, data);
530 /* Load the grammar locally on the object */
531 res = ast_speech_grammar_load(speech, args.grammar, args.path);
536 /*! \brief SpeechUnloadGrammar(Grammar Name) Dialplan Application */
537 static int speech_unload(struct ast_channel *chan, void *data)
540 struct ast_speech *speech = find_speech(chan);
545 /* Unload the grammar */
546 res = ast_speech_grammar_unload(speech, data);
551 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
552 static int speech_deactivate(struct ast_channel *chan, void *data)
555 struct ast_speech *speech = find_speech(chan);
560 /* Deactivate the grammar on the speech object */
561 res = ast_speech_grammar_deactivate(speech, data);
566 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
567 static int speech_activate(struct ast_channel *chan, void *data)
570 struct ast_speech *speech = find_speech(chan);
575 /* Activate the grammar on the speech object */
576 res = ast_speech_grammar_activate(speech, data);
581 /*! \brief SpeechStart() Dialplan Application */
582 static int speech_start(struct ast_channel *chan, void *data)
585 struct ast_speech *speech = find_speech(chan);
590 ast_speech_start(speech);
595 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
596 static int speech_processing_sound(struct ast_channel *chan, void *data)
599 struct ast_speech *speech = find_speech(chan);
604 if (speech->processing_sound != NULL) {
605 ast_free(speech->processing_sound);
606 speech->processing_sound = NULL;
609 speech->processing_sound = ast_strdup(data);
614 /*! \brief Helper function used by speech_background to playback a soundfile */
615 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
617 struct ast_filestream *fs = NULL;
619 if (!(fs = ast_openstream(chan, filename, preflang)))
622 if (ast_applystream(chan, fs))
631 SB_OPT_NOANSWER = (1 << 0),
634 AST_APP_OPTIONS(speech_background_options, BEGIN_OPTIONS
635 AST_APP_OPTION('n', SB_OPT_NOANSWER),
638 /*! \brief SpeechBackground(Sound File,Timeout) Dialplan Application */
639 static int speech_background(struct ast_channel *chan, void *data)
641 unsigned int timeout = 0;
642 int res = 0, done = 0, started = 0, quieted = 0, max_dtmf_len = 0;
643 struct ast_speech *speech = find_speech(chan);
644 struct ast_frame *f = NULL;
645 int oldreadformat = AST_FORMAT_SLINEAR;
646 char dtmf[AST_MAX_EXTENSION] = "";
647 struct timeval start = { 0, 0 }, current;
648 struct ast_datastore *datastore = NULL;
649 char *parse, *filename_tmp = NULL, *filename = NULL, tmp[2] = "", dtmf_terminator = '#';
650 const char *tmp2 = NULL;
651 struct ast_flags options = { 0 };
652 AST_DECLARE_APP_ARGS(args,
653 AST_APP_ARG(soundfile);
654 AST_APP_ARG(timeout);
655 AST_APP_ARG(options);
658 parse = ast_strdupa(data);
659 AST_STANDARD_APP_ARGS(args, parse);
664 if (!ast_strlen_zero(args.options)) {
665 char *options_buf = ast_strdupa(args.options);
666 ast_app_parse_options(speech_background_options, &options, NULL, options_buf);
669 /* If channel is not already answered, then answer it */
670 if (chan->_state != AST_STATE_UP && !ast_test_flag(&options, SB_OPT_NOANSWER)
671 && ast_answer(chan)) {
675 /* Record old read format */
676 oldreadformat = chan->readformat;
678 /* Change read format to be signed linear */
679 if (ast_set_read_format(chan, speech->format))
682 if (!ast_strlen_zero(args.soundfile)) {
684 filename_tmp = ast_strdupa(args.soundfile);
685 if (!ast_strlen_zero(args.timeout)) {
686 if ((timeout = atof(args.timeout) * 1000.0) == 0)
692 /* See if the maximum DTMF length variable is set... we use a variable in case they want to carry it through their entire dialplan */
693 ast_channel_lock(chan);
694 if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_MAXLEN")) && !ast_strlen_zero(tmp2)) {
695 max_dtmf_len = atoi(tmp2);
698 /* See if a terminator is specified */
699 if ((tmp2 = pbx_builtin_getvar_helper(chan, "SPEECH_DTMF_TERMINATOR"))) {
700 if (ast_strlen_zero(tmp2))
701 dtmf_terminator = '\0';
703 dtmf_terminator = tmp2[0];
705 ast_channel_unlock(chan);
707 /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
708 if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
709 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
710 ast_speech_start(speech);
713 /* Ensure no streams are currently running */
714 ast_stopstream(chan);
716 /* Okay it's streaming so go into a loop grabbing frames! */
718 /* If the filename is null and stream is not running, start up a new sound file */
719 if (!quieted && (chan->streamid == -1 && chan->timingfunc == NULL) && (filename = strsep(&filename_tmp, "&"))) {
720 /* Discard old stream information */
721 ast_stopstream(chan);
722 /* Start new stream */
723 speech_streamfile(chan, filename, chan->language);
726 /* Run scheduled stuff */
727 ast_sched_runq(chan->sched);
730 res = ast_sched_wait(chan->sched);
734 /* If there is a frame waiting, get it - if not - oh well */
735 if (ast_waitfor(chan, res) > 0) {
738 /* The channel has hung up most likely */
744 /* Do timeout check (shared between audio/dtmf) */
745 if ((!quieted || strlen(dtmf)) && started == 1) {
746 current = ast_tvnow();
747 if ((ast_tvdiff_ms(current, start)) >= timeout) {
755 /* Do checks on speech structure to see if it's changed */
756 ast_mutex_lock(&speech->lock);
757 if (ast_test_flag(speech, AST_SPEECH_QUIET)) {
759 ast_stopstream(chan);
760 ast_clear_flag(speech, AST_SPEECH_QUIET);
763 /* Check state so we can see what to do */
764 switch (speech->state) {
765 case AST_SPEECH_STATE_READY:
766 /* If audio playback has stopped do a check for timeout purposes */
767 if (chan->streamid == -1 && chan->timingfunc == NULL)
768 ast_stopstream(chan);
769 if (!quieted && chan->stream == NULL && timeout && started == 0 && !filename_tmp) {
779 /* Write audio frame out to speech engine if no DTMF has been received */
780 if (!strlen(dtmf) && f != NULL && f->frametype == AST_FRAME_VOICE) {
781 ast_speech_write(speech, f->data.ptr, f->datalen);
784 case AST_SPEECH_STATE_WAIT:
785 /* Cue up waiting sound if not already playing */
787 if (chan->stream == NULL) {
788 if (speech->processing_sound != NULL) {
789 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
790 speech_streamfile(chan, speech->processing_sound, chan->language);
793 } else if (chan->streamid == -1 && chan->timingfunc == NULL) {
794 ast_stopstream(chan);
795 if (speech->processing_sound != NULL) {
796 if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound, "none")) {
797 speech_streamfile(chan, speech->processing_sound, chan->language);
803 case AST_SPEECH_STATE_DONE:
804 /* Now that we are done... let's switch back to not ready state */
805 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
807 /* Copy to speech structure the results, if available */
808 speech->results = ast_speech_results_get(speech);
809 /* Break out of our background too */
811 /* Stop audio playback */
812 if (chan->stream != NULL) {
813 ast_stopstream(chan);
820 ast_mutex_unlock(&speech->lock);
822 /* Deal with other frame types */
824 /* Free the frame we received */
825 switch (f->frametype) {
827 if (dtmf_terminator != '\0' && f->subclass == dtmf_terminator) {
830 if (chan->stream != NULL) {
831 ast_stopstream(chan);
834 /* Change timeout to be 5 seconds for DTMF input */
835 timeout = (chan->pbx && chan->pbx->dtimeoutms) ? chan->pbx->dtimeoutms : 5000;
839 snprintf(tmp, sizeof(tmp), "%c", f->subclass);
840 strncat(dtmf, tmp, sizeof(dtmf) - strlen(dtmf) - 1);
841 /* If the maximum length of the DTMF has been reached, stop now */
842 if (max_dtmf_len && strlen(dtmf) == max_dtmf_len)
846 case AST_FRAME_CONTROL:
847 switch (f->subclass) {
848 case AST_CONTROL_HANGUP:
849 /* Since they hung up we should destroy the speech structure */
862 if (!ast_strlen_zero(dtmf)) {
863 /* We sort of make a results entry */
864 speech->results = ast_calloc(1, sizeof(*speech->results));
865 if (speech->results != NULL) {
866 ast_speech_dtmf(speech, dtmf);
867 speech->results->score = 1000;
868 speech->results->text = ast_strdup(dtmf);
869 speech->results->grammar = ast_strdup("dtmf");
871 ast_speech_change_state(speech, AST_SPEECH_STATE_NOT_READY);
874 /* See if it was because they hung up */
876 /* Destroy speech structure */
877 ast_speech_destroy(speech);
878 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
879 if (datastore != NULL)
880 ast_channel_datastore_remove(chan, datastore);
882 /* Channel is okay so restore read format */
883 ast_set_read_format(chan, oldreadformat);
890 /*! \brief SpeechDestroy() Dialplan Application */
891 static int speech_destroy(struct ast_channel *chan, void *data)
894 struct ast_speech *speech = find_speech(chan);
895 struct ast_datastore *datastore = NULL;
900 /* Destroy speech structure */
901 ast_speech_destroy(speech);
903 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
904 if (datastore != NULL) {
905 ast_channel_datastore_remove(chan, datastore);
911 static int unload_module(void)
915 res = ast_unregister_application("SpeechCreate");
916 res |= ast_unregister_application("SpeechLoadGrammar");
917 res |= ast_unregister_application("SpeechUnloadGrammar");
918 res |= ast_unregister_application("SpeechActivateGrammar");
919 res |= ast_unregister_application("SpeechDeactivateGrammar");
920 res |= ast_unregister_application("SpeechStart");
921 res |= ast_unregister_application("SpeechBackground");
922 res |= ast_unregister_application("SpeechDestroy");
923 res |= ast_unregister_application("SpeechProcessingSound");
924 res |= ast_custom_function_unregister(&speech_function);
925 res |= ast_custom_function_unregister(&speech_score_function);
926 res |= ast_custom_function_unregister(&speech_text_function);
927 res |= ast_custom_function_unregister(&speech_grammar_function);
928 res |= ast_custom_function_unregister(&speech_engine_function);
929 res |= ast_custom_function_unregister(&speech_results_type_function);
934 static int load_module(void)
938 res = ast_register_application_xml("SpeechCreate", speech_create);
939 res |= ast_register_application_xml("SpeechLoadGrammar", speech_load);
940 res |= ast_register_application_xml("SpeechUnloadGrammar", speech_unload);
941 res |= ast_register_application_xml("SpeechActivateGrammar", speech_activate);
942 res |= ast_register_application_xml("SpeechDeactivateGrammar", speech_deactivate);
943 res |= ast_register_application_xml("SpeechStart", speech_start);
944 res |= ast_register_application_xml("SpeechBackground", speech_background);
945 res |= ast_register_application_xml("SpeechDestroy", speech_destroy);
946 res |= ast_register_application_xml("SpeechProcessingSound", speech_processing_sound);
947 res |= ast_custom_function_register(&speech_function);
948 res |= ast_custom_function_register(&speech_score_function);
949 res |= ast_custom_function_register(&speech_text_function);
950 res |= ast_custom_function_register(&speech_grammar_function);
951 res |= ast_custom_function_register(&speech_engine_function);
952 res |= ast_custom_function_register(&speech_results_type_function);
957 AST_MODULE_INFO_STANDARD(ASTERISK_GPL_KEY, "Dialplan Speech Applications");