264f56ba132911adc89c730042da2cb7c3585e85
[asterisk/asterisk.git] / apps / app_speech_utils.c
1 /*
2  * Asterisk -- An open source telephony toolkit.
3  *
4  * Copyright (C) 2006, Digium, Inc.
5  *
6  * Joshua Colp <jcolp@digium.com>
7  *
8  * See http://www.asterisk.org for more information about
9  * the Asterisk project. Please do not directly contact
10  * any of the maintainers of this project for assistance;
11  * the project provides a web site, mailing lists and IRC
12  * channels for your use.
13  *
14  * This program is free software, distributed under the terms of
15  * the GNU General Public License Version 2. See the LICENSE file
16  * at the top of the source tree.
17  */
18
19 /*! \file
20  *
21  * \brief Speech Recognition Utility Applications
22  *
23  * \author Joshua Colp <jcolp@digium.com>
24  *
25  * \ingroup applications
26  */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <string.h>
32
33 #include "asterisk.h"
34
35 ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
36
37 #include "asterisk/file.h"
38 #include "asterisk/logger.h"
39 #include "asterisk/channel.h"
40 #include "asterisk/pbx.h"
41 #include "asterisk/module.h"
42 #include "asterisk/lock.h"
43 #include "asterisk/app.h"
44 #include "asterisk/speech.h"
45
46 static char *tdesc = "Dialplan Speech Applications";
47
48 LOCAL_USER_DECL;
49
50 /* Descriptions for each application */
51 static char *speechcreate_descrip =
52 "SpeechCreate(engine name)\n"
53 "This application creates information to be used by all the other applications. It must be called before doing any speech recognition activities such as activating a grammar.\n"
54 "It takes the engine name to use as the argument, if not specified the default engine will be used.\n";
55
56 static char *speechactivategrammar_descrip =
57 "SpeechActivateGrammar(Grammar Name)\n"
58 "This activates the specified grammar to be recognized by the engine. A grammar tells the speech recognition engine what to recognize, \n"
59         "and how to portray it back to you in the dialplan. The grammar name is the only argument to this application.\n";
60
61 static char *speechstart_descrip =
62 "SpeechStart()\n"
63         "Tell the speech recognition engine that it should start trying to get results from audio being fed to it. This has no arguments.\n";
64
65 static char *speechbackground_descrip =
66 "SpeechBackground(Sound File|Timeout)\n"
67 "This application plays a sound file and waits for the person to speak. Once they start speaking playback of the file stops, and silence is heard.\n"
68 "Once they stop talking the processing sound is played to indicate the speech recognition engine is working.\n"
69 "Once results are available the application returns and results (score and text) are available as dialplan variables.\n"
70 "The first text and score are ${TEXT0} AND ${SCORE0} while the second are ${TEXT1} and ${SCORE1}.\n"
71 "This may change in the future, however, to use a dialplan function instead of dialplan variables. Note it is possible to have more then one result.\n"
72         "The first argument is the sound file and the second is the timeout. Note the timeout will only start once the sound file has stopped playing.\n";
73
74 static char *speechdeactivategrammar_descrip =
75 "SpeechDeactivateGrammar(Grammar Name)\n"
76         "This deactivates the specified grammar so that it is no longer recognized. The only argument is the grammar name to deactivate.\n";
77
78 static char *speechprocessingsound_descrip =
79 "SpeechProcessingSound(Sound File)\n"
80 "This changes the processing sound that SpeechBackground plays back when the speech recognition engine is processing and working to get results.\n"
81         "It takes the sound file as the only argument.\n";
82
83 static char *speechdestroy_descrip =
84 "SpeechDestroy()\n"
85 "This destroys the information used by all the other speech recognition applications.\n"
86 "If you call this application but end up wanting to recognize more speech, you must call SpeechCreate\n"
87         "again before calling any other application. It takes no arguments.\n";
88
89 /*! \brief Helper function used by datastores to destroy the speech structure upon hangup */
90 static void destroy_callback(void *data)
91 {
92         struct ast_speech *speech = (struct ast_speech*)data;
93
94         if (speech == NULL) {
95                 return;
96         }
97
98         /* Deallocate now */
99         ast_speech_destroy(speech);
100
101         return;
102 }
103
104 /*! \brief Static structure for datastore information */
105 static const struct ast_datastore_info speech_datastore = {
106         .type = "speech",
107         .destroy = destroy_callback
108 };
109
110 /*! \brief Helper function used to find the speech structure attached to a channel */
111 static struct ast_speech *find_speech(struct ast_channel *chan)
112 {
113         struct ast_speech *speech = NULL;
114         struct ast_datastore *datastore = NULL;
115         
116         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
117         if (datastore == NULL) {
118                 return NULL;
119         }
120         speech = datastore->data;
121
122         return speech;
123 }
124
125 /*! \brief SpeechCreate() Dialplan Application */
126 static int speech_create(struct ast_channel *chan, void *data)
127 {
128         struct localuser *u = NULL;
129         struct ast_speech *speech = NULL;
130         struct ast_datastore *datastore = NULL;
131
132         LOCAL_USER_ADD(u);
133
134         /* Request a speech object */
135         speech = ast_speech_new(data, AST_FORMAT_SLINEAR);
136         if (speech == NULL) {
137                 /* Not available */
138                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
139                 LOCAL_USER_REMOVE(u);
140                 return 0;
141         }
142
143         datastore = ast_channel_datastore_alloc(&speech_datastore, NULL);
144         if (datastore == NULL) {
145                 ast_speech_destroy(speech);
146                 pbx_builtin_setvar_helper(chan, "ERROR", "1");
147                 LOCAL_USER_REMOVE(u);
148                 return 0;
149         }
150         datastore->data = speech;
151         ast_channel_datastore_add(chan, datastore);
152
153         LOCAL_USER_REMOVE(u);
154
155         return 0;
156 }
157
158 /*! \brief SpeechDeactivateGrammar(Grammar Name) Dialplan Application */
159 static int speech_deactivate(struct ast_channel *chan, void *data)
160 {
161         int res = 0;
162         struct localuser *u = NULL;
163         struct ast_speech *speech = find_speech(chan);
164
165         LOCAL_USER_ADD(u);
166
167         if (speech == NULL) {
168                 LOCAL_USER_REMOVE(u);
169                 return -1;
170         }
171
172         /* Deactivate the grammar on the speech object */
173         res = ast_speech_grammar_deactivate(speech, data);
174
175         LOCAL_USER_REMOVE(u);
176
177         return res;
178 }
179
180 /*! \brief SpeechActivateGrammar(Grammar Name) Dialplan Application */
181 static int speech_activate(struct ast_channel *chan, void *data)
182 {
183         int res = 0;
184         struct localuser *u = NULL;
185         struct ast_speech *speech = find_speech(chan);
186
187         LOCAL_USER_ADD(u);
188
189         if (speech == NULL) {
190                 LOCAL_USER_REMOVE(u);
191                 return -1;
192         }
193
194         /* Activate the grammar on the speech object */
195         res = ast_speech_grammar_activate(speech, data);
196
197         LOCAL_USER_REMOVE(u);
198
199         return res;
200 }
201
202 /*! \brief SpeechStart() Dialplan Application */
203 static int speech_start(struct ast_channel *chan, void *data)
204 {
205         int res = 0;
206         struct localuser *u = NULL;
207         struct ast_speech *speech = find_speech(chan);
208
209         LOCAL_USER_ADD(u);
210
211         if (speech == NULL) {
212                 LOCAL_USER_REMOVE(u);
213                 return -1;
214         }
215
216         ast_speech_start(speech);
217
218         LOCAL_USER_REMOVE(u);
219
220         return res;
221 }
222
223 /*! \brief SpeechProcessingSound(Sound File) Dialplan Application */
224 static int speech_processing_sound(struct ast_channel *chan, void *data)
225 {
226         int res = 0;
227         struct localuser *u = NULL;
228         struct ast_speech *speech = find_speech(chan);
229
230         LOCAL_USER_ADD(u);
231
232         if (speech == NULL) {
233                 LOCAL_USER_REMOVE(u);
234                 return -1;
235         }
236
237         if (speech->processing_sound != NULL) {
238                 free(speech->processing_sound);
239                 speech->processing_sound = NULL;
240         }
241
242         speech->processing_sound = strdup(data);
243
244         LOCAL_USER_REMOVE(u);
245
246         return res;
247 }
248
249 /*! \brief Helper function used by speech_background to playback a soundfile */
250 static int speech_streamfile(struct ast_channel *chan, const char *filename, const char *preflang)
251 {
252         struct ast_filestream *fs;
253         struct ast_filestream *vfs=NULL;
254
255         fs = ast_openstream(chan, filename, preflang);
256         if (fs)
257                 vfs = ast_openvstream(chan, filename, preflang);
258         if (fs){
259                 if (ast_applystream(chan, fs))
260                         return -1;
261                 if (vfs && ast_applystream(chan, vfs))
262                         return -1;
263                 if (ast_playstream(fs))
264                         return -1;
265                 if (vfs && ast_playstream(vfs))
266                         return -1;
267                 return 0;
268         }
269         return -1;
270 }
271
272 /*! \brief SpeechBackground(Sound File|Timeout) Dialplan Application */
273 static int speech_background(struct ast_channel *chan, void *data)
274 {
275         unsigned int timeout = 0;
276         int res = 0, done = 0, concepts = 0, argc = 0, started = 0;
277         struct localuser *u = NULL;
278         struct ast_speech *speech = find_speech(chan);
279         struct ast_speech_result *results = NULL, *result = NULL;
280         struct ast_frame *f = NULL;
281         int oldreadformat = AST_FORMAT_SLINEAR;
282         char tmp[256] = "", tmp2[256] = "";
283         char dtmf[AST_MAX_EXTENSION] = "";
284         time_t start, current;
285         struct ast_datastore *datastore = NULL;
286         char *argv[2], *args = NULL, *filename = NULL;
287
288         if (!(args = ast_strdupa(data)))
289                 return -1;
290
291         LOCAL_USER_ADD(u);
292
293         if (speech == NULL) {
294                 LOCAL_USER_REMOVE(u);
295                 return -1;
296         }
297
298         /* Record old read format */
299         oldreadformat = chan->readformat;
300
301         /* Change read format to be signed linear */
302         if (ast_set_read_format(chan, AST_FORMAT_SLINEAR)) {
303                 LOCAL_USER_REMOVE(u);
304                 return -1;
305         }
306
307         /* Parse out options */
308         argc = ast_app_separate_args(args, '|', argv, sizeof(argv) / sizeof(argv[0]));
309         if (argc > 0) {
310                 /* Yay sound file */
311                 filename = argv[0];
312                 if (argv[1] != NULL)
313                         timeout = atoi(argv[1]);
314         }
315
316         /* Start streaming the file if possible and specified */
317         if (filename != NULL && ast_streamfile(chan, filename, chan->language)) {
318                 /* An error occured while streaming */
319                 ast_set_read_format(chan, oldreadformat);
320                 LOCAL_USER_REMOVE(u);
321                 return -1;
322         }
323
324         /* Before we go into waiting for stuff... make sure the structure is ready, if not - start it again */
325         if (speech->state == AST_SPEECH_STATE_NOT_READY || speech->state == AST_SPEECH_STATE_DONE) {
326                 speech->state = AST_SPEECH_STATE_NOT_READY;
327                 ast_speech_start(speech);
328         }
329
330         /* Okay it's streaming so go into a loop grabbing frames! */
331         while (done == 0) {
332                 /* Run scheduled stuff */
333                 ast_sched_runq(chan->sched);
334
335                 /* Yay scheduling */
336                 res = ast_sched_wait(chan->sched);
337                 if (res < 0) {
338                         res = 1000;
339                 }
340
341                 /* If there is a frame waiting, get it - if not - oh well */
342                 if (ast_waitfor(chan, res) > 0) {
343                         f = ast_read(chan);
344                         if (f == NULL) {
345                                 /* The channel has hung up most likely */
346                                 done = 3;
347                                 break;
348                         }
349                 }
350
351                 /* Do checks on speech structure to see if it's changed */
352                 ast_mutex_lock(&speech->lock);
353                 if (ast_test_flag(speech, AST_SPEECH_QUIET) && chan->stream != NULL) {
354                         ast_stopstream(chan);
355                         ast_clear_flag(speech, AST_SPEECH_QUIET);
356                 }
357                 /* Check state so we can see what to do */
358                 switch (speech->state) {
359                 case AST_SPEECH_STATE_READY:
360                         /* If audio playback has stopped do a check for timeout purposes */
361                         if (chan->streamid == -1 && chan->timingfunc == NULL)
362                                 ast_stopstream(chan);
363                         if (chan->stream == NULL && timeout > 0) {
364                                 /* If start time is not yet done... do it */
365                                 if (started == 0) {
366                                         time(&start);
367                                         started = 1;
368                                 } else {
369                                         time(&current);
370                                         if ((current-start) >= timeout) {
371                                                 pbx_builtin_setvar_helper(chan, "SILENCE", "1");
372                                                 done = 1;
373                                                 break;
374                                         }
375                                 }
376                         }
377                         /* Deal with audio frames if present */
378                         if (f != NULL && f->frametype == AST_FRAME_VOICE) {
379                                 ast_speech_write(speech, f->data, f->datalen);
380                         }
381                         break;
382                 case AST_SPEECH_STATE_WAIT:
383                         /* Cue up waiting sound if not already playing */
384                         if (chan->stream == NULL) {
385                                 if (speech->processing_sound != NULL) {
386                                         if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound,"none")) {
387                                                 speech_streamfile(chan, speech->processing_sound, chan->language);
388                                         }
389                                 }
390                         } else if (chan->streamid == -1 && chan->timingfunc == NULL) {
391                                 ast_stopstream(chan);
392                                 if (speech->processing_sound != NULL) {
393                                         if (strlen(speech->processing_sound) > 0 && strcasecmp(speech->processing_sound,"none")) {
394                                                 speech_streamfile(chan, speech->processing_sound, chan->language);
395                                         }
396                                 }
397                         }
398                         break;
399                 case AST_SPEECH_STATE_DONE:
400                         /* Assume there will be no results by default */
401                         pbx_builtin_setvar_helper(chan, "RESULTS", "0");
402                         pbx_builtin_setvar_helper(chan, "SILENCE", "0");
403                         /* Decoding is done and over... see if we have results */
404                         results = ast_speech_results_get(speech);
405                         if (results != NULL) {
406                                 for (result=results; result!=NULL; result=result->next) {
407                                         /* Text */
408                                         snprintf(tmp, sizeof(tmp), "TEXT%d", concepts);
409                                         pbx_builtin_setvar_helper(chan, tmp, result->text);
410                                         /* Now... score! */
411                                         snprintf(tmp, sizeof(tmp), "SCORE%d", concepts);
412                                         snprintf(tmp2, sizeof(tmp2), "%d", result->score);
413                                         pbx_builtin_setvar_helper(chan, tmp, tmp2);
414                                         concepts++;
415                                 }
416                                 /* Expose number of results to dialplan */
417                                 snprintf(tmp, sizeof(tmp), "%d", concepts);
418                                 pbx_builtin_setvar_helper(chan, "RESULTS", tmp);
419                                 /* Destroy the results since they are now in the dialplan */
420                                 ast_speech_results_free(results);
421                         }
422                         /* Now that we are done... let's switch back to not ready state */
423                         speech->state = AST_SPEECH_STATE_NOT_READY;
424                         /* Break out of our background too */
425                         done = 1;
426                         /* Stop audio playback */
427                         if (chan->stream != NULL) {
428                                 ast_stopstream(chan);
429                         }
430                         break;
431                 default:
432                         break;
433                 }
434                 ast_mutex_unlock(&speech->lock);
435
436                 /* Deal with other frame types */
437                 if (f != NULL) {
438                         /* Free the frame we received */
439                         switch (f->frametype) {
440                         case AST_FRAME_DTMF:
441                                 if (f->subclass == '#') {
442                                         /* Input is done, throw it into the dialplan */
443                                         pbx_builtin_setvar_helper(chan, "RESULTS", "1");
444                                         pbx_builtin_setvar_helper(chan, "SCORE0", "1000");
445                                         pbx_builtin_setvar_helper(chan, "TEXT0", dtmf);
446                                         done = 1;
447                                 } else {
448                                         if (chan->stream != NULL) {
449                                                 ast_stopstream(chan);
450                                         }
451                                         /* Start timeout if not already started */
452                                         if (strlen(dtmf) == 0) {
453                                                 time(&start);
454                                         }
455                                         /* Append to the current information */
456                                         snprintf(tmp, sizeof(tmp), "%c", f->subclass);
457                                         strncat(dtmf, tmp, sizeof(dtmf));
458                                 }
459                                 break;
460                         case AST_FRAME_CONTROL:
461                                 ast_log(LOG_NOTICE, "Have a control frame of subclass %d\n", f->subclass);
462                                 switch (f->subclass) {
463                                 case AST_CONTROL_HANGUP:
464                                         /* Since they hung up we should destroy the speech structure */
465                                         done = 3;
466                                 default:
467                                         break;
468                                 }
469                         default:
470                                 break;
471                         }
472                         ast_frfree(f);
473                         f = NULL;
474                 }
475         }
476
477         /* See if it was because they hung up */
478         if (done == 3) {
479                 /* Destroy speech structure */
480                 ast_speech_destroy(speech);
481
482                 datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
483                 if (datastore != NULL) {
484                         ast_channel_datastore_remove(chan, datastore);
485                 }
486         } else {
487                 /* Channel is okay so restore read format */
488                 ast_set_read_format(chan, oldreadformat);
489         }
490
491         LOCAL_USER_REMOVE(u);
492
493         return 0;
494 }
495
496 /*! \brief SpeechDestroy() Dialplan Application */
497 static int speech_destroy(struct ast_channel *chan, void *data)
498 {
499         int res = 0;
500         struct localuser *u = NULL;
501         struct ast_speech *speech = find_speech(chan);
502         struct ast_datastore *datastore = NULL;
503
504         LOCAL_USER_ADD(u);
505
506         if (speech == NULL) {
507                 LOCAL_USER_REMOVE(u);
508                 return -1;
509         }
510
511         /* Destroy speech structure */
512         ast_speech_destroy(speech);
513
514         datastore = ast_channel_datastore_find(chan, &speech_datastore, NULL);
515         if (datastore != NULL) {
516                 ast_channel_datastore_remove(chan, datastore);
517         }
518
519         LOCAL_USER_REMOVE(u);
520
521         return res;
522 }
523
524 int unload_module(void)
525 {
526         int res = 0;
527
528         res = ast_unregister_application("SpeechCreate");
529         res |= ast_unregister_application("SpeechActivateGrammar");
530         res |= ast_unregister_application("SpeechDeactivateGrammar");
531         res |= ast_unregister_application("SpeechStart");
532         res |= ast_unregister_application("SpeechBackground");
533         res |= ast_unregister_application("SpeechDestroy");
534
535         STANDARD_HANGUP_LOCALUSERS;
536
537         return res;     
538 }
539
540 int load_module(void)
541 {
542         int res = 0;
543
544         res = ast_register_application("SpeechCreate", speech_create, "Create a Speech Structure", speechcreate_descrip);
545         res |= ast_register_application("SpeechActivateGrammar", speech_activate, "Activate a Grammar", speechactivategrammar_descrip);
546         res |= ast_register_application("SpeechDeactivateGrammar", speech_deactivate, "Deactivate a Grammar", speechdeactivategrammar_descrip);
547         res |= ast_register_application("SpeechStart", speech_start, "Start recognizing", speechstart_descrip);
548         res |= ast_register_application("SpeechBackground", speech_background, "Play a sound file and wait for speech to be recognized", speechbackground_descrip);
549         res |= ast_register_application("SpeechDestroy", speech_destroy, "End speech recognition", speechdestroy_descrip);
550         res |= ast_register_application("SpeechProcessingSound", speech_processing_sound, "Change background processing sound", speechprocessingsound_descrip);
551         
552         return res;
553 }
554
555 int reload(void)
556 {
557         return 0;
558 }
559
560 const char *description(void)
561 {
562         return tdesc;
563 }
564
565 int usecount(void)
566 {
567         int res;
568
569         STANDARD_USECOUNT(res);
570
571         return res;
572 }
573
574 const char *key()
575 {
576         return ASTERISK_GPL_KEY;
577 }