Add support for ICE/STUN/TURN in res_rtp_asterisk and chan_sip.
[asterisk/asterisk.git] / res / pjproject / pjmedia / src / pjmedia / echo_suppress.c
1 /* $Id$ */
2 /* 
3  * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4  * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
19  */
20 #include <pjmedia/types.h>
21 #include <pjmedia/alaw_ulaw.h>
22 #include <pjmedia/errno.h>
23 #include <pjmedia/frame.h>
24 #include <pjmedia/silencedet.h>
25 #include <pj/array.h>
26 #include <pj/assert.h>
27 #include <pj/lock.h>
28 #include <pj/log.h>
29 #include <pj/os.h>
30 #include <pj/pool.h>
31
32 #include "echo_internal.h"
33
34 #define THIS_FILE                           "echo_suppress.c"
35
36 /* Maximum float constant */
37 #define MAX_FLOAT               (float)1.701411e38
38
39 /* The effective learn duration (in seconds) before we declare that learning
40  * is complete. The actual learning duration itself may be longer depending
41  * on the conversation pattern (e.g. we can't detect echo if speaker is only
42  * playing silence).
43  */
44 #define MAX_CALC_DURATION_SEC   3
45
46 /* The internal audio segment length, in milliseconds. 10ms shold be good
47  * and no need to change it.
48  */
49 #define SEGMENT_PTIME           10
50
51 /* The length of the template signal in milliseconds. The longer the template,
52  * the better correlation will be found, at the expense of more processing
53  * and longer learning time.
54  */
55 #define TEMPLATE_PTIME          200
56
57 /* How long to look back in the past to see if either mic or speaker is
58  * active.
59  */
60 #define SIGNAL_LOOKUP_MSEC      200
61
62 /* The minimum level value to be considered as talking, in uLaw complement
63  * (0-255).
64  */
65 #define MIN_SIGNAL_ULAW         35
66
67 /* The period (in seconds) on which the ES will analize it's effectiveness,
68  * and it may trigger soft-reset to force recalculation.
69  */
70 #define CHECK_PERIOD            30
71
72 /* Maximum signal level of average echo residue (in uLaw complement). When
73  * the residue value exceeds this value, we force the ES to re-learn.
74  */
75 #define MAX_RESIDUE             2.5
76
77
78 #if 0
79 #   define TRACE_(expr) PJ_LOG(5,expr)
80 #else
81 #   define TRACE_(expr)
82 #endif
83
84 PJ_INLINE(float) FABS(float val)
85 {
86     if (val < 0)
87         return -val;
88     else
89         return val;
90 }
91
92
93 #if defined(PJ_HAS_FLOATING_POINT) && PJ_HAS_FLOATING_POINT!=0
94     typedef float pj_ufloat_t;
95 #   define pj_ufloat_from_float(f)      (f)
96 #   define pj_ufloat_mul_u(val1, f)     ((val1) * (f))
97 #   define pj_ufloat_mul_i(val1, f)     ((val1) * (f))
98 #else
99     typedef pj_uint32_t pj_ufloat_t;
100
101     pj_ufloat_t pj_ufloat_from_float(float f)
102     {
103         return (pj_ufloat_t)(f * 65536);
104     }
105
106     unsigned pj_ufloat_mul_u(unsigned val1, pj_ufloat_t val2)
107     {
108         return (val1 * val2) >> 16;
109     }
110
111     int pj_ufloat_mul_i(int val1, pj_ufloat_t val2)
112     {
113         return (val1 * (pj_int32_t)val2) >> 16;
114     }
115 #endif
116
117
118 /* Conversation state */
119 typedef enum talk_state
120 {
121     ST_NULL,
122     ST_LOCAL_TALK,
123     ST_REM_SILENT,
124     ST_DOUBLETALK,
125     ST_REM_TALK
126 } talk_state_t;
127
128 const char *state_names[] = 
129 {
130     "Null",
131     "local talking",
132     "remote silent",
133     "doubletalk",
134     "remote talking"
135 };
136
137
138 /* Description:
139
140    The echo suppressor tries to find the position of echoed signal by looking
141    at the correlation between signal played to the speaker (played signal) 
142    and the signal captured from the microphone (recorded signal).
143
144    To do this, it first divides the frames (from mic and speaker) into 
145    segments, calculate the audio level of the segment, and save the level
146    information in the playback and record history (play_hist and rec_hist
147    respectively).
148
149    In the history, the newest element (depicted as "t0" in the diagram belo)
150    is put in the last position of the array.
151
152    The record history size is as large as the template size (tmpl_cnt), since
153    we will use the record history as the template to find the best matching 
154    position in the playback history.
155
156    Here is the record history buffer:
157
158        <--templ_cnt-->
159        +-------------+
160        |   rec_hist  |
161        +-------------+
162     t-templ_cnt......t0
163
164    As you can see, the newest frame ("t0") is put as the last element in the
165    array.
166
167    The playback history size is larger than record history, since we need to
168    find the matching pattern in the past. The playback history size is
169    "templ_cnt + tail_cnt", where "tail_cnt" is the number of segments equal
170    to the maximum tail length. The maximum tail length is set when the ES
171    is created.
172
173    Here is the playback history buffer:
174
175        <-----tail_cnt-----> <--templ_cnt-->
176        +-------------------+--------------+
177        |             play_hist            |
178        +-------------------+--------------+
179    t-play_hist_cnt...t-templ_cnt.......t0
180
181
182
183    Learning:
184
185    During the processing, the ES calculates the following values:
186     - the correlation value, that is how similar the playback signal compared
187       to the mic signal. The lower the correlation value the better (i.e. more
188       similar) the signal is. The correlation value is done over the template
189       duration.
190     - the gain scaling factor, that is the ratio between mic signal and 
191       speaker signal. The ES calculates both the minimum and average ratios.
192
193    The ES calculates both the values above for every tail position in the
194    playback history. The values are saved in arrays below:
195
196      <-----tail_cnt----->
197      +-------------------+
198      |      corr_sum     |
199      +-------------------+
200      |     min_factor    |
201      +-------------------+
202      |     avg_factor    |
203      +-------------------+
204
205    At the end of processing, the ES iterates through the correlation array and
206    picks the tail index with the lowest corr_sum value. This is the position
207    where echo is most likely to be found.
208
209
210    Processing:
211
212    Once learning is done, the ES will change the level of the mic signal 
213    depending on the state of the conversation and according to the ratio that
214    has been found in the learning phase above.
215
216  */
217
218 /*
219  * The simple echo suppresor state
220  */
221 typedef struct echo_supp
222 {
223     unsigned     clock_rate;        /* Clock rate.                          */
224     pj_uint16_t  samples_per_frame; /* Frame length in samples              */
225     pj_uint16_t  samples_per_segment;/* Segment length in samples           */
226     pj_uint16_t  tail_ms;           /* Tail length in milliseconds          */
227     pj_uint16_t  tail_samples;      /* Tail length in samples.              */
228
229     pj_bool_t    learning;          /* Are we still learning yet?           */
230     talk_state_t talk_state;        /* Current talking state                */
231     int          tail_index;        /* Echo location, -1 if not found       */
232
233     unsigned     max_calc;          /* # of calc before learning complete.
234                                        (see MAX_CALC_DURATION_SEC)          */
235     unsigned     calc_cnt;          /* Number of calculations so far        */
236
237     unsigned     update_cnt;        /* # of updates                         */
238     unsigned     templ_cnt;         /* Template length, in # of segments    */
239     unsigned     tail_cnt;          /* Tail length, in # of segments        */
240     unsigned     play_hist_cnt;     /* # of segments in play_hist           */
241     pj_uint16_t *play_hist;         /* Array of playback levels             */
242     pj_uint16_t *rec_hist;          /* Array of rec levels                  */
243
244     float       *corr_sum;          /* Array of corr for each tail pos.     */
245     float       *tmp_corr;          /* Temporary corr array calculation     */
246     float        best_corr;         /* Best correlation so far.             */
247
248     unsigned     sum_rec_level;     /* Running sum of level in rec_hist     */
249     float        rec_corr;          /* Running corr in rec_hist.            */
250
251     unsigned     sum_play_level0;   /* Running sum of level for first pos   */
252     float        play_corr0;        /* Running corr for first pos .         */
253
254     float       *min_factor;        /* Array of minimum scaling factor      */
255     float       *avg_factor;        /* Array of average scaling factor      */
256     float       *tmp_factor;        /* Array to store provisional result    */
257
258     unsigned     running_cnt;       /* Running duration in # of frames      */
259     float        residue;           /* Accummulated echo residue.           */
260     float        last_factor;       /* Last factor applied to mic signal    */
261 } echo_supp;
262
263
264
265 /*
266  * Create. 
267  */
268 PJ_DEF(pj_status_t) echo_supp_create( pj_pool_t *pool,
269                                       unsigned clock_rate,
270                                       unsigned channel_count,
271                                       unsigned samples_per_frame,
272                                       unsigned tail_ms,
273                                       unsigned options,
274                                       void **p_state )
275 {
276     echo_supp *ec;
277
278     PJ_UNUSED_ARG(channel_count);
279     PJ_UNUSED_ARG(options);
280
281     PJ_ASSERT_RETURN(samples_per_frame >= SEGMENT_PTIME * clock_rate / 1000,
282                      PJ_ENOTSUP);
283
284     ec = PJ_POOL_ZALLOC_T(pool, struct echo_supp);
285     ec->clock_rate = clock_rate;
286     ec->samples_per_frame = (pj_uint16_t)samples_per_frame;
287     ec->samples_per_segment = (pj_uint16_t)(SEGMENT_PTIME * clock_rate / 1000);
288     ec->tail_ms = (pj_uint16_t)tail_ms;
289     ec->tail_samples = (pj_uint16_t)(tail_ms * clock_rate / 1000);
290
291     ec->templ_cnt = TEMPLATE_PTIME / SEGMENT_PTIME;
292     ec->tail_cnt = (pj_uint16_t)(tail_ms / SEGMENT_PTIME);
293     ec->play_hist_cnt = (pj_uint16_t)(ec->tail_cnt+ec->templ_cnt);
294
295     ec->max_calc = (pj_uint16_t)(MAX_CALC_DURATION_SEC * clock_rate / 
296                                  ec->samples_per_segment);
297
298     ec->rec_hist = (pj_uint16_t*) 
299                     pj_pool_alloc(pool, ec->templ_cnt *
300                                         sizeof(ec->rec_hist[0]));
301
302     /* Note: play history has twice number of elements */
303     ec->play_hist = (pj_uint16_t*) 
304                      pj_pool_alloc(pool, ec->play_hist_cnt *
305                                          sizeof(ec->play_hist[0]));
306
307     ec->corr_sum = (float*)
308                    pj_pool_alloc(pool, ec->tail_cnt * 
309                                        sizeof(ec->corr_sum[0]));
310     ec->tmp_corr = (float*)
311                    pj_pool_alloc(pool, ec->tail_cnt * 
312                                        sizeof(ec->tmp_corr[0]));
313     ec->min_factor = (float*)
314                      pj_pool_alloc(pool, ec->tail_cnt * 
315                                          sizeof(ec->min_factor[0]));
316     ec->avg_factor = (float*)
317                      pj_pool_alloc(pool, ec->tail_cnt * 
318                                          sizeof(ec->avg_factor[0]));
319     ec->tmp_factor = (float*)
320                      pj_pool_alloc(pool, ec->tail_cnt * 
321                                          sizeof(ec->tmp_factor[0]));
322     echo_supp_reset(ec);
323
324     *p_state = ec;
325     return PJ_SUCCESS;
326 }
327
328
329 /*
330  * Destroy. 
331  */
332 PJ_DEF(pj_status_t) echo_supp_destroy(void *state)
333 {
334     PJ_UNUSED_ARG(state);
335     return PJ_SUCCESS;
336 }
337
338
339 /*
340  * Hard reset
341  */
342 PJ_DEF(void) echo_supp_reset(void *state)
343 {
344     unsigned i;
345     echo_supp *ec = (echo_supp*) state;
346
347     pj_bzero(ec->rec_hist, ec->templ_cnt * sizeof(ec->rec_hist[0]));
348     pj_bzero(ec->play_hist, ec->play_hist_cnt * sizeof(ec->play_hist[0]));
349
350     for (i=0; i<ec->tail_cnt; ++i) {
351         ec->corr_sum[i] = ec->avg_factor[i] = 0;
352         ec->min_factor[i] = MAX_FLOAT;
353     }
354
355     ec->update_cnt = 0;
356     ec->calc_cnt = 0;
357     ec->learning = PJ_TRUE;
358     ec->tail_index = -1;
359     ec->best_corr = MAX_FLOAT;
360     ec->talk_state = ST_NULL;
361     ec->last_factor = 1.0;
362     ec->residue = 0;
363     ec->running_cnt = 0;
364     ec->sum_rec_level = ec->sum_play_level0 = 0;
365     ec->rec_corr = ec->play_corr0 = 0;
366 }
367
368 /*
369  * Soft reset to force the EC to re-learn without having to discard all
370  * rec and playback history.
371  */
372 PJ_DEF(void) echo_supp_soft_reset(void *state)
373 {
374     unsigned i;
375
376     echo_supp *ec = (echo_supp*) state;
377
378     for (i=0; i<ec->tail_cnt; ++i) {
379         ec->corr_sum[i] = 0;
380     }
381
382     ec->update_cnt = 0;
383     ec->calc_cnt = 0;
384     ec->learning = PJ_TRUE;
385     ec->best_corr = MAX_FLOAT;
386     ec->residue = 0;
387     ec->running_cnt = 0;
388     ec->sum_rec_level = ec->sum_play_level0 = 0;
389     ec->rec_corr = ec->play_corr0 = 0;
390
391     PJ_LOG(4,(THIS_FILE, "Echo suppressor soft reset. Re-learning.."));
392 }
393
394
395 /* Set state */
396 static void echo_supp_set_state(echo_supp *ec, talk_state_t state, 
397                                 unsigned level)
398 {
399     PJ_UNUSED_ARG(level);
400
401     if (state != ec->talk_state) {
402         TRACE_((THIS_FILE, "[%03d.%03d] %s --> %s, level=%u", 
403                            (ec->update_cnt * SEGMENT_PTIME / 1000), 
404                            ((ec->update_cnt * SEGMENT_PTIME) % 1000),
405                            state_names[ec->talk_state],
406                            state_names[state], level));
407         ec->talk_state = state;
408     }
409 }
410
411 /*
412  * Update EC state
413  */
414 static void echo_supp_update(echo_supp *ec, pj_int16_t *rec_frm,
415                              const pj_int16_t *play_frm)
416 {
417     int prev_index;
418     unsigned i, j, frm_level, sum_play_level, ulaw;
419     pj_uint16_t old_rec_frm_level, old_play_frm_level;
420     float play_corr;
421
422     ++ec->update_cnt;
423     if (ec->update_cnt > 0x7FFFFFFF)
424         ec->update_cnt = 0x7FFFFFFF; /* Detect overflow */
425
426     /* Calculate current play frame level */
427     frm_level = pjmedia_calc_avg_signal(play_frm, ec->samples_per_segment);
428     ++frm_level; /* to avoid division by zero */
429
430     /* Save the oldest frame level for later */
431     old_play_frm_level = ec->play_hist[0];
432
433     /* Push current frame level to the back of the play history */
434     pj_array_erase(ec->play_hist, sizeof(pj_uint16_t), ec->play_hist_cnt, 0);
435     ec->play_hist[ec->play_hist_cnt-1] = (pj_uint16_t) frm_level;
436
437     /* Calculate level of current mic frame */
438     frm_level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_segment);
439     ++frm_level; /* to avoid division by zero */
440
441     /* Save the oldest frame level for later */
442     old_rec_frm_level = ec->rec_hist[0];
443
444     /* Push to the back of the rec history */
445     pj_array_erase(ec->rec_hist, sizeof(pj_uint16_t), ec->templ_cnt, 0);
446     ec->rec_hist[ec->templ_cnt-1] = (pj_uint16_t) frm_level;
447
448
449     /* Can't do the calc until the play history is full. */
450     if (ec->update_cnt < ec->play_hist_cnt)
451         return;
452
453     /* Skip if learning is done */
454     if (!ec->learning)
455         return;
456
457
458     /* Calculate rec signal pattern */
459     if (ec->sum_rec_level == 0) {
460         /* Buffer has just been filled up, do full calculation */
461         ec->rec_corr = 0;
462         ec->sum_rec_level = 0;
463         for (i=0; i < ec->templ_cnt-1; ++i) {
464             float corr;
465             corr = (float)ec->rec_hist[i+1] / ec->rec_hist[i];
466             ec->rec_corr += corr;
467             ec->sum_rec_level += ec->rec_hist[i];
468         }
469         ec->sum_rec_level += ec->rec_hist[i];
470     } else {
471         /* Update from previous calculation */
472         ec->sum_rec_level = ec->sum_rec_level - old_rec_frm_level + 
473                             ec->rec_hist[ec->templ_cnt-1];
474         ec->rec_corr = ec->rec_corr - ((float)ec->rec_hist[0] / 
475                                               old_rec_frm_level) +
476                        ((float)ec->rec_hist[ec->templ_cnt-1] /
477                                ec->rec_hist[ec->templ_cnt-2]);
478     }
479
480     /* Iterate through the play history and calculate the signal correlation
481      * for every tail position in the play_hist. Save the result in temporary
482      * array since we may bail out early if the conversation state is not good
483      * to detect echo.
484      */
485     /* 
486      * First phase: do full calculation for the first position 
487      */
488     if (ec->sum_play_level0 == 0) {
489         /* Buffer has just been filled up, do full calculation */
490         sum_play_level = 0;
491         play_corr = 0;
492         for (j=0; j<ec->templ_cnt-1; ++j) {
493             float corr;
494             corr = (float)ec->play_hist[j+1] / ec->play_hist[j];
495             play_corr += corr;
496             sum_play_level += ec->play_hist[j];
497         }
498         sum_play_level += ec->play_hist[j];
499         ec->sum_play_level0 = sum_play_level;
500         ec->play_corr0 = play_corr;
501     } else {
502         /* Update from previous calculation */
503         ec->sum_play_level0 = ec->sum_play_level0 - old_play_frm_level + 
504                               ec->play_hist[ec->templ_cnt-1];
505         ec->play_corr0 = ec->play_corr0 - ((float)ec->play_hist[0] / 
506                                                   old_play_frm_level) +
507                          ((float)ec->play_hist[ec->templ_cnt-1] /
508                                  ec->play_hist[ec->templ_cnt-2]);
509         sum_play_level = ec->sum_play_level0;
510         play_corr = ec->play_corr0;
511     }
512     ec->tmp_corr[0] = FABS(play_corr - ec->rec_corr);
513     ec->tmp_factor[0] = (float)ec->sum_rec_level / sum_play_level;
514
515     /* Bail out if remote isn't talking */
516     ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
517     if (ulaw < MIN_SIGNAL_ULAW) {
518         echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
519         return;
520     }
521     /* Bail out if local user is talking */
522     if (ec->sum_rec_level >= sum_play_level) {
523         echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
524         return;
525     }
526
527     /*
528      * Second phase: do incremental calculation for the rest of positions
529      */
530     for (i=1; i < ec->tail_cnt; ++i) {
531         unsigned end;
532
533         end = i + ec->templ_cnt;
534
535         sum_play_level = sum_play_level - ec->play_hist[i-1] +
536                          ec->play_hist[end-1];
537         play_corr = play_corr - ((float)ec->play_hist[i]/ec->play_hist[i-1]) +
538                     ((float)ec->play_hist[end-1]/ec->play_hist[end-2]);
539
540         /* Bail out if remote isn't talking */
541         ulaw = pjmedia_linear2ulaw(sum_play_level/ec->templ_cnt) ^ 0xFF;
542         if (ulaw < MIN_SIGNAL_ULAW) {
543             echo_supp_set_state(ec, ST_REM_SILENT, ulaw);
544             return;
545         }
546
547         /* Bail out if local user is talking */
548         if (ec->sum_rec_level >= sum_play_level) {
549             echo_supp_set_state(ec, ST_LOCAL_TALK, ulaw);
550             return;
551         }
552
553 #if 0
554         // disabled: not a good idea if mic throws out loud echo
555         /* Also bail out if we suspect there's a doubletalk */
556         ulaw = pjmedia_linear2ulaw(ec->sum_rec_level/ec->templ_cnt) ^ 0xFF;
557         if (ulaw > MIN_SIGNAL_ULAW) {
558             echo_supp_set_state(ec, ST_DOUBLETALK, ulaw);
559             return;
560         }
561 #endif
562
563         /* Calculate correlation and save to temporary array */
564         ec->tmp_corr[i] = FABS(play_corr - ec->rec_corr);
565
566         /* Also calculate the gain factor between mic and speaker level */
567         ec->tmp_factor[i] = (float)ec->sum_rec_level / sum_play_level;
568         pj_assert(ec->tmp_factor[i] < 1);
569     }
570
571     /* We seem to have good signal, we can update the EC state */
572     echo_supp_set_state(ec, ST_REM_TALK, MIN_SIGNAL_ULAW);
573
574     /* Accummulate the correlation value to the history and at the same
575      * time find the tail index of the best correlation.
576      */
577     prev_index = ec->tail_index;
578     for (i=1; i<ec->tail_cnt-1; ++i) {
579         float *p = &ec->corr_sum[i], sum;
580
581         /* Accummulate correlation value  for this tail position */
582         ec->corr_sum[i] += ec->tmp_corr[i];
583
584         /* Update the min and avg gain factor for this tail position */
585         if (ec->tmp_factor[i] < ec->min_factor[i])
586             ec->min_factor[i] = ec->tmp_factor[i];
587         ec->avg_factor[i] = ((ec->avg_factor[i] * ec->tail_cnt) + 
588                                     ec->tmp_factor[i]) /
589                             (ec->tail_cnt + 1);
590
591         /* To get the best correlation, also include the correlation
592          * value of the neighbouring tail locations.
593          */
594         sum = *(p-1) + (*p)*2 + *(p+1);
595         //sum = *p;
596
597         /* See if we have better correlation value */
598         if (sum < ec->best_corr) {
599             ec->tail_index = i;
600             ec->best_corr = sum;
601         }
602     }
603
604     if (ec->tail_index != prev_index) {
605         unsigned duration;
606         int imin, iavg;
607
608         duration = ec->update_cnt * SEGMENT_PTIME;
609         imin = (int)(ec->min_factor[ec->tail_index] * 1000);
610         iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
611
612         PJ_LOG(4,(THIS_FILE, 
613                   "Echo suppressor updated at t=%03d.%03ds, echo tail=%d msec"
614                   ", factor min/avg=%d.%03d/%d.%03d",
615                   (duration/1000), (duration%1000),
616                   (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
617                   imin/1000, imin%1000,
618                   iavg/1000, iavg%1000));
619
620     }
621
622     ++ec->calc_cnt;
623
624     if (ec->calc_cnt > ec->max_calc) {
625         unsigned duration;
626         int imin, iavg;
627
628
629         ec->learning = PJ_FALSE;
630         ec->running_cnt = 0;
631
632         duration = ec->update_cnt * SEGMENT_PTIME;
633         imin = (int)(ec->min_factor[ec->tail_index] * 1000);
634         iavg = (int)(ec->avg_factor[ec->tail_index] * 1000);
635
636         PJ_LOG(4,(THIS_FILE, 
637                   "Echo suppressor learning done at t=%03d.%03ds, tail=%d ms"
638                   ", factor min/avg=%d.%03d/%d.%03d",
639                   (duration/1000), (duration%1000),
640                   (ec->tail_cnt-ec->tail_index) * SEGMENT_PTIME,
641                   imin/1000, imin%1000,
642                   iavg/1000, iavg%1000));
643     }
644
645 }
646
647
648 /* Amplify frame */
649 static void amplify_frame(pj_int16_t *frm, unsigned length, 
650                           pj_ufloat_t factor)
651 {
652     unsigned i;
653
654     for (i=0; i<length; ++i) {
655         frm[i] = (pj_int16_t)pj_ufloat_mul_i(frm[i], factor);
656     }
657 }
658
659 /* 
660  * Perform echo cancellation.
661  */
662 PJ_DEF(pj_status_t) echo_supp_cancel_echo( void *state,
663                                            pj_int16_t *rec_frm,
664                                            const pj_int16_t *play_frm,
665                                            unsigned options,
666                                            void *reserved )
667 {
668     unsigned i, N;
669     echo_supp *ec = (echo_supp*) state;
670
671     PJ_UNUSED_ARG(options);
672     PJ_UNUSED_ARG(reserved);
673
674     /* Calculate number of segments. This should be okay even if
675      * samples_per_frame is not a multiply of samples_per_segment, since
676      * we only calculate level.
677      */
678     N = ec->samples_per_frame / ec->samples_per_segment;
679     pj_assert(N>0);
680     for (i=0; i<N; ++i) {
681         unsigned pos = i * ec->samples_per_segment;
682         echo_supp_update(ec, rec_frm+pos, play_frm+pos);
683     }
684
685     if (ec->tail_index < 0) {
686         /* Not ready */
687     } else {
688         unsigned lookup_cnt, rec_level=0, play_level=0;
689         unsigned tail_cnt;
690         float factor;
691
692         /* How many previous segments to lookup */
693         lookup_cnt = SIGNAL_LOOKUP_MSEC / SEGMENT_PTIME;
694         if (lookup_cnt > ec->templ_cnt)
695             lookup_cnt = ec->templ_cnt;
696
697         /* Lookup in recording history to get maximum mic level, to see
698          * if local user is currently talking
699          */
700         for (i=ec->templ_cnt - lookup_cnt; i < ec->templ_cnt; ++i) {
701             if (ec->rec_hist[i] > rec_level)
702                 rec_level = ec->rec_hist[i];
703         }
704         rec_level = pjmedia_linear2ulaw(rec_level) ^ 0xFF;
705
706         /* Calculate the detected tail length, in # of segments */
707         tail_cnt = (ec->tail_cnt - ec->tail_index);
708
709         /* Lookup in playback history to get max speaker level, to see
710          * if remote user is currently talking
711          */
712         for (i=ec->play_hist_cnt -lookup_cnt -tail_cnt; 
713              i<ec->play_hist_cnt-tail_cnt; ++i) 
714         {
715             if (ec->play_hist[i] > play_level)
716                 play_level = ec->play_hist[i];
717         }
718         play_level = pjmedia_linear2ulaw(play_level) ^ 0xFF;
719
720         if (rec_level >= MIN_SIGNAL_ULAW) {
721             if (play_level < MIN_SIGNAL_ULAW) {
722                 /* Mic is talking, speaker is idle. Let mic signal pass as is.
723                  */
724                 factor = 1.0;
725                 echo_supp_set_state(ec, ST_LOCAL_TALK, rec_level);
726             } else if (rec_level > play_level) {
727                 /* Seems that both are talking. Scale the mic signal
728                  * down a little bit to reduce echo, while allowing both
729                  * parties to talk at the same time.
730                  */
731                 factor = (float)(ec->avg_factor[ec->tail_index] * 2);
732                 echo_supp_set_state(ec, ST_DOUBLETALK, rec_level);
733             } else {
734                 /* Speaker is active, but we've picked up large signal in
735                  * the microphone. Assume that this is an echo, so bring 
736                  * the level down to minimum too.
737                  */
738                 factor = ec->min_factor[ec->tail_index] / 2;
739                 echo_supp_set_state(ec, ST_REM_TALK, play_level);
740             }
741         } else {
742             if (play_level < MIN_SIGNAL_ULAW) {
743                 /* Both mic and speaker seems to be idle. Also scale the
744                  * mic signal down with average factor to reduce low power
745                  * echo.
746                  */
747                 factor = ec->avg_factor[ec->tail_index] * 3 / 2;
748                 echo_supp_set_state(ec, ST_REM_SILENT, rec_level);
749             } else {
750                 /* Mic is idle, but there's something playing in speaker.
751                  * Scale the mic down to minimum
752                  */
753                 factor = ec->min_factor[ec->tail_index] / 2;
754                 echo_supp_set_state(ec, ST_REM_TALK, play_level);
755             }
756         }
757
758         /* Smoothen the transition */
759         if (factor >= ec->last_factor)
760             factor = (factor + ec->last_factor) / 2;
761         else
762             factor = (factor + ec->last_factor*19) / 20;
763
764         /* Amplify frame */
765         amplify_frame(rec_frm, ec->samples_per_frame, 
766                       pj_ufloat_from_float(factor));
767         ec->last_factor = factor;
768
769         if (ec->talk_state == ST_REM_TALK) {
770             unsigned level, recalc_cnt;
771
772             /* Get the adjusted frame signal level */
773             level = pjmedia_calc_avg_signal(rec_frm, ec->samples_per_frame);
774             level = pjmedia_linear2ulaw(level) ^ 0xFF;
775
776             /* Accumulate average echo residue to see the ES effectiveness */
777             ec->residue = ((ec->residue * ec->running_cnt) + level) / 
778                           (ec->running_cnt + 1);
779
780             ++ec->running_cnt;
781
782             /* Check if we need to re-learn */
783             recalc_cnt = CHECK_PERIOD * ec->clock_rate / ec->samples_per_frame;
784             if (ec->running_cnt > recalc_cnt) {
785                 int iresidue;
786
787                 iresidue = (int)(ec->residue*1000);
788
789                 PJ_LOG(5,(THIS_FILE, "Echo suppressor residue = %d.%03d",
790                           iresidue/1000, iresidue%1000));
791
792                 if (ec->residue > MAX_RESIDUE && !ec->learning) {
793                     echo_supp_soft_reset(ec);
794                     ec->residue = 0;
795                 } else {
796                     ec->running_cnt = 0;
797                     ec->residue = 0;
798                 }
799             }
800         }
801     }
802
803     return PJ_SUCCESS;
804 }
805