support DTX and CNG in speex (bug #4608)
[asterisk/asterisk.git] / codecs / codec_speex.c
1 /*
2  * Asterisk -- A telephony toolkit for Linux.
3  *
4  * Translate between signed linear and Speex (Open Codec)
5  *
6  * Copyright (C) 2002, Digium
7  *
8  * Mark Spencer <markster@digium.com>
9  *
10  * This program is free software, distributed under the terms of
11  * the GNU General Public License
12  *
13  * This work was motivated by Jeremy McNamara 
14  * hacked to be configurable by anthm and bkw 9/28/2004
15  */
16
17 #include <fcntl.h>
18 #include <stdlib.h>
19 #include <unistd.h>
20 #include <netinet/in.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <speex.h>
24
25 static int quality = 8;
26 static int complexity = 2;
27 static int enhancement = 0;
28 static int vad = 0;
29 static int vbr = 0;
30 static float vbr_quality = 0;
31 static int abr = 0;
32 static int dtx = 0;
33
34 #define TYPE_SILENCE     0x2
35 #define TYPE_HIGH        0x0
36 #define TYPE_LOW         0x1
37 #define TYPE_MASK        0x3
38
39 #include "asterisk.h"
40
41 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
42
43 #include "asterisk/lock.h"
44 #include "asterisk/translate.h"
45 #include "asterisk/module.h"
46 #include "asterisk/config.h"
47 #include "asterisk/options.h"
48 #include "asterisk/logger.h"
49 #include "asterisk/channel.h"
50
51 /* Sample frame data */
52 #include "slin_speex_ex.h"
53 #include "speex_slin_ex.h"
54
55 AST_MUTEX_DEFINE_STATIC(localuser_lock);
56 static int localusecnt=0;
57
58 static char *tdesc = "Speex/PCM16 (signed linear) Codec Translator";
59
60 struct ast_translator_pvt {
61         void *speex;
62         struct ast_frame f;
63         SpeexBits bits;
64         int framesize;
65         /* Space to build offset */
66         char offset[AST_FRIENDLY_OFFSET];
67         /* Buffer for our outgoing frame */
68         short outbuf[8000];
69         /* Enough to store a full second */
70         short buf[8000];
71         int tail;
72        int silent_state;
73 };
74
75 #define speex_coder_pvt ast_translator_pvt
76
77 static struct ast_translator_pvt *lintospeex_new(void)
78 {
79         struct speex_coder_pvt *tmp;
80         tmp = malloc(sizeof(struct speex_coder_pvt));
81         if (tmp) {
82                 if (!(tmp->speex = speex_encoder_init(&speex_nb_mode))) {
83                         free(tmp);
84                         tmp = NULL;
85                 } else {
86                         speex_bits_init(&tmp->bits);
87                         speex_bits_reset(&tmp->bits);
88                         speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
89                         speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
90
91                         if(!abr && !vbr) {
92                                 speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
93                                 if (vad)
94                                         speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
95                         }
96                         if (vbr) {
97                                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
98                                 speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
99                         }
100                         if (abr) {
101                                 speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
102                         }
103                        if (dtx)
104                                speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx);
105                         tmp->tail = 0;
106                        tmp->silent_state = 0;
107                 }
108                 localusecnt++;
109         }
110         return tmp;
111 }
112
113 static struct ast_translator_pvt *speextolin_new(void)
114 {
115         struct speex_coder_pvt *tmp;
116         tmp = malloc(sizeof(struct speex_coder_pvt));
117         if (tmp) {
118                 if (!(tmp->speex = speex_decoder_init(&speex_nb_mode))) {
119                         free(tmp);
120                         tmp = NULL;
121                 } else {
122                         speex_bits_init(&tmp->bits);
123                         speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
124                         if (enhancement)
125                                 speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
126                         tmp->tail = 0;
127                 }
128                 localusecnt++;
129         }
130         return tmp;
131 }
132
133 static struct ast_frame *lintospeex_sample(void)
134 {
135         static struct ast_frame f;
136         f.frametype = AST_FRAME_VOICE;
137         f.subclass = AST_FORMAT_SLINEAR;
138         f.datalen = sizeof(slin_speex_ex);
139         /* Assume 8000 Hz */
140         f.samples = sizeof(slin_speex_ex)/2;
141         f.mallocd = 0;
142         f.offset = 0;
143         f.src = __PRETTY_FUNCTION__;
144         f.data = slin_speex_ex;
145         return &f;
146 }
147
148 static struct ast_frame *speextolin_sample(void)
149 {
150         static struct ast_frame f;
151         f.frametype = AST_FRAME_VOICE;
152         f.subclass = AST_FORMAT_SPEEX;
153         f.datalen = sizeof(speex_slin_ex);
154         /* All frames are 20 ms long */
155         f.samples = 160;
156         f.mallocd = 0;
157         f.offset = 0;
158         f.src = __PRETTY_FUNCTION__;
159         f.data = speex_slin_ex;
160         return &f;
161 }
162
163 static struct ast_frame *speextolin_frameout(struct ast_translator_pvt *tmp)
164 {
165         if (!tmp->tail)
166                 return NULL;
167         /* Signed linear is no particular frame size, so just send whatever
168            we have in the buffer in one lump sum */
169         tmp->f.frametype = AST_FRAME_VOICE;
170         tmp->f.subclass = AST_FORMAT_SLINEAR;
171         tmp->f.datalen = tmp->tail * 2;
172         /* Assume 8000 Hz */
173         tmp->f.samples = tmp->tail;
174         tmp->f.mallocd = 0;
175         tmp->f.offset = AST_FRIENDLY_OFFSET;
176         tmp->f.src = __PRETTY_FUNCTION__;
177         tmp->f.data = tmp->buf;
178         /* Reset tail pointer */
179         tmp->tail = 0;
180         return &tmp->f; 
181 }
182
183 static int speextolin_framein(struct ast_translator_pvt *tmp, struct ast_frame *f)
184 {
185         /* Assuming there's space left, decode into the current buffer at
186            the tail location.  Read in as many frames as there are */
187         int x;
188         int res;
189         float fout[1024];
190
191         if(f->datalen == 0) {  /* Native PLC interpolation */
192                 if(tmp->tail + tmp->framesize > sizeof(tmp->buf) / 2) {
193                         ast_log(LOG_WARNING, "Out of buffer space\n");
194                         return -1;
195                 }
196                 speex_decode(tmp->speex, NULL, fout);
197                 for (x=0;x<tmp->framesize;x++) {
198                         tmp->buf[tmp->tail + x] = fout[x];
199                 }
200                 tmp->tail += tmp->framesize;
201                 return 0;
202         }
203
204
205         /* Read in bits */
206         speex_bits_read_from(&tmp->bits, f->data, f->datalen);
207         for(;;) {
208                 res = speex_decode(tmp->speex, &tmp->bits, fout);
209                 if (res < 0)
210                         break;
211                 if (tmp->tail + tmp->framesize < sizeof(tmp->buf) / 2) {
212                         for (x=0;x<tmp->framesize;x++) {
213                                 tmp->buf[tmp->tail + x] = fout[x];
214                         }
215                         tmp->tail += tmp->framesize;
216                 } else {
217                         ast_log(LOG_WARNING, "Out of buffer space\n");
218                         return -1;
219                 }
220                 
221         }
222         return 0;
223 }
224
225 static int lintospeex_framein(struct ast_translator_pvt *tmp, struct ast_frame *f)
226 {
227         /* Just add the frames to our stream */
228         /* XXX We should look at how old the rest of our stream is, and if it
229            is too old, then we should overwrite it entirely, otherwise we can
230            get artifacts of earlier talk that do not belong */
231         if (tmp->tail + f->datalen/2 < sizeof(tmp->buf) / 2) {
232                 memcpy((tmp->buf + tmp->tail), f->data, f->datalen);
233                 tmp->tail += f->datalen/2;
234         } else {
235                 ast_log(LOG_WARNING, "Out of buffer space\n");
236                 return -1;
237         }
238         return 0;
239 }
240
241 static struct ast_frame *lintospeex_frameout(struct ast_translator_pvt *tmp)
242 {
243         float fbuf[1024];
244         int len;
245         int y=0,x;
246        int is_speech=1;
247         /* We can't work on anything less than a frame in size */
248         if (tmp->tail < tmp->framesize)
249                 return NULL;
250         tmp->f.frametype = AST_FRAME_VOICE;
251         tmp->f.subclass = AST_FORMAT_SPEEX;
252         tmp->f.mallocd = 0;
253         tmp->f.offset = AST_FRIENDLY_OFFSET;
254         tmp->f.src = __PRETTY_FUNCTION__;
255         tmp->f.data = tmp->outbuf;
256         speex_bits_reset(&tmp->bits);
257         while(tmp->tail >= tmp->framesize) {
258                 /* Convert to floating point */
259                 for (x=0;x<tmp->framesize;x++)
260                         fbuf[x] = tmp->buf[x];
261                 /* Encode a frame of data */
262                is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
263                 /* Assume 8000 Hz -- 20 ms */
264                 tmp->tail -= tmp->framesize;
265                 /* Move the data at the end of the buffer to the front */
266                 if (tmp->tail)
267                         memmove(tmp->buf, tmp->buf + tmp->framesize, tmp->tail * 2);
268                 y++;
269         }
270
271        /* Use AST_FRAME_CNG to signify the start of any silence period */
272        if(!is_speech) {
273                if(tmp->silent_state) {
274                        return NULL;
275                } else {
276                        tmp->silent_state = 1;
277                        speex_bits_reset(&tmp->bits);
278                        tmp->f.frametype = AST_FRAME_CNG;
279                }
280        } else {
281                tmp->silent_state = 0;
282        }
283
284         /* Terminate bit stream */
285         speex_bits_pack(&tmp->bits, 15, 5);
286         len = speex_bits_write(&tmp->bits, (char *)tmp->outbuf, sizeof(tmp->outbuf));
287         tmp->f.datalen = len;
288         tmp->f.samples = y * 160;
289 #if 0
290         {
291                 static int fd = -1;
292                 if (fd  < 0) {
293                         fd = open("speex.raw", O_WRONLY|O_TRUNC|O_CREAT);
294                         if (fd > -1) {
295                                 write(fd, tmp->f.data, tmp->f.datalen);
296                                 close(fd);
297                         }
298                 }
299         }
300 #endif  
301         return &tmp->f; 
302 }
303
304 static void speextolin_destroy(struct ast_translator_pvt *pvt)
305 {
306         speex_decoder_destroy(pvt->speex);
307         speex_bits_destroy(&pvt->bits);
308         free(pvt);
309         localusecnt--;
310 }
311
312 static void lintospeex_destroy(struct ast_translator_pvt *pvt)
313 {
314         speex_encoder_destroy(pvt->speex);
315         speex_bits_destroy(&pvt->bits);
316         free(pvt);
317         localusecnt--;
318 }
319
320 static struct ast_translator speextolin =
321         { "speextolin", 
322            AST_FORMAT_SPEEX, AST_FORMAT_SLINEAR,
323            speextolin_new,
324            speextolin_framein,
325            speextolin_frameout,
326            speextolin_destroy,
327            speextolin_sample
328            };
329
330 static struct ast_translator lintospeex =
331         { "lintospeex", 
332            AST_FORMAT_SLINEAR, AST_FORMAT_SPEEX,
333            lintospeex_new,
334            lintospeex_framein,
335            lintospeex_frameout,
336            lintospeex_destroy,
337            lintospeex_sample
338         };
339
340
341 static void parse_config(void) 
342 {
343         struct ast_config *cfg;
344         struct ast_variable *var;
345         int res;
346         float res_f;
347
348         if ((cfg = ast_config_load("codecs.conf"))) {
349                 if ((var = ast_variable_browse(cfg, "speex"))) {
350                         while (var) {
351                                 if (!strcasecmp(var->name, "quality")) {
352                                         res = abs(atoi(var->value));
353                                         if (res > -1 && res < 11) {
354                                                 if (option_verbose > 2)
355                                                         ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
356                                                 ast_mutex_lock(&localuser_lock);
357                                                 quality = res;
358                                                 ast_mutex_unlock(&localuser_lock);
359                                         } else 
360                                                 ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
361                                 } else if (!strcasecmp(var->name, "complexity")) {
362                                         res = abs(atoi(var->value));
363                                         if (option_verbose > 2)
364                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
365                                         if (res > -1 && res < 11) {
366                                                 ast_mutex_lock(&localuser_lock);
367                                                 complexity = res;
368                                                 ast_mutex_unlock(&localuser_lock);
369                                         } else 
370                                                 ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
371                                 } else if (!strcasecmp(var->name, "vbr_quality")) {
372                                         res_f = abs(atof(var->value));
373                                         if (option_verbose > 2)
374                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
375                                         if (res_f >= 0 && res_f <= 10) {
376                                                 ast_mutex_lock(&localuser_lock);
377                                                 vbr_quality = res_f;
378                                                 ast_mutex_unlock(&localuser_lock);
379                                         } else 
380                                                 ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
381                                 } else if (!strcasecmp(var->name, "abr_quality")) {
382                                         ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
383                                 } else if (!strcasecmp(var->name, "enhancement")) {
384                                         ast_mutex_lock(&localuser_lock);
385                                         enhancement = ast_true(var->value) ? 1 : 0;
386                                         if (option_verbose > 2)
387                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
388                                         ast_mutex_unlock(&localuser_lock);
389                                 } else if (!strcasecmp(var->name, "vbr")) {
390                                         ast_mutex_lock(&localuser_lock);
391                                         vbr = ast_true(var->value) ? 1 : 0;
392                                         if (option_verbose > 2)
393                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
394                                         ast_mutex_unlock(&localuser_lock);
395                                 } else if (!strcasecmp(var->name, "abr")) {
396                                         res = abs(atoi(var->value));
397                                         if (option_verbose > 2) {
398                                               if(res > 0)
399                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
400                                               else
401                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
402                                         }
403                                         if (res >= 0) {
404                                                 ast_mutex_lock(&localuser_lock);
405                                                 abr = res;
406                                                 ast_mutex_unlock(&localuser_lock);
407                                         } else 
408                                                 ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
409                                 } else if (!strcasecmp(var->name, "vad")) {
410                                         ast_mutex_lock(&localuser_lock);
411                                         vad = ast_true(var->value) ? 1 : 0;
412                                         if (option_verbose > 2)
413                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
414                                         ast_mutex_unlock(&localuser_lock);
415                                 } else if (!strcasecmp(var->name, "dtx")) {
416                                         ast_mutex_lock(&localuser_lock);
417                                         dtx = ast_true(var->value) ? 1 : 0;
418                                         if (option_verbose > 2)
419                                                 ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
420                                         ast_mutex_unlock(&localuser_lock);
421                                 }
422                                 var = var->next;
423                         }
424                 }
425                 ast_config_destroy(cfg);
426         }
427 }
428
429 int reload(void) 
430 {
431         parse_config();
432         return 0;
433 }
434
435 int unload_module(void)
436 {
437         int res;
438         ast_mutex_lock(&localuser_lock);
439         res = ast_unregister_translator(&lintospeex);
440         if (!res)
441                 res = ast_unregister_translator(&speextolin);
442         if (localusecnt)
443                 res = -1;
444         ast_mutex_unlock(&localuser_lock);
445         return res;
446 }
447
448 int load_module(void)
449 {
450         int res;
451         parse_config();
452         res=ast_register_translator(&speextolin);
453         if (!res) 
454                 res=ast_register_translator(&lintospeex);
455         else
456                 ast_unregister_translator(&speextolin);
457         return res;
458 }
459
460 char *description(void)
461 {
462         return tdesc;
463 }
464
465 int usecount(void)
466 {
467         int res;
468         STANDARD_USECOUNT(res);
469         return res;
470 }
471
472 char *key()
473 {
474         return ASTERISK_GPL_KEY;
475 }