2 * Experimental support for video sessions. We use SDL for rendering, ffmpeg
3 * as the codec library for encoding and decoding, and Video4Linux and X11
4 * to generate the local video stream.
6 * If one of these pieces is not available, either at compile time or at
7 * runtime, we do our best to run without it. Of course, no codec library
8 * means we can only deal with raw data, no SDL means we cannot do rendering,
9 * no V4L or X11 means we cannot generate data (but in principle we could
10 * stream from or record to a file).
12 * We need a recent (2007.07.12 or newer) version of ffmpeg to avoid warnings.
13 * Older versions might give 'deprecated' messages during compilation,
14 * thus not compiling in AST_DEVMODE, or don't have swscale, in which case
15 * you can try to compile #defining OLD_FFMPEG here.
20 //#define DROP_PACKETS 5 /* if set, drop this % of video packets */
21 //#define OLD_FFMPEG 1 /* set for old ffmpeg with no swscale */
24 #include <sys/ioctl.h>
25 #include "asterisk/cli.h"
26 #include "asterisk/file.h"
27 #include "asterisk/channel.h"
29 #include "console_video.h"
32 The code is structured as follows.
34 When a new console channel is created, we call console_video_start()
35 to initialize SDL, the source, and the encoder/ decoder for the
36 formats in use (XXX the latter two should be done later, once the
37 codec negotiation is complete). Also, a thread is created to handle
38 the video source and generate frames.
40 While communication is on, the local source is generated by the
41 video thread, which wakes up periodically, generates frames and
42 enqueues them in chan->readq. Incoming rtp frames are passed to
43 console_write_video(), decoded and passed to SDL for display.
45 For as unfortunate and confusing as it can be, we need to deal with a
46 number of different video representations (size, codec/pixel format,
47 codec parameters), as follows:
49 loc_src is the data coming from the camera/X11/etc.
50 The format is typically constrained by the video source.
52 enc_in is the input required by the encoder.
53 Typically constrained in size by the encoder type.
55 enc_out is the bitstream transmitted over RTP.
56 Typically negotiated while the call is established.
58 loc_dpy is the format used to display the local video source.
59 Depending on user preferences this can have the same size as
60 loc_src_fmt, or enc_in_fmt, or thumbnail size (e.g. PiP output)
62 dec_in is the incoming RTP bitstream. Negotiated
63 during call establishment, it is not necessarily the same as
66 dec_out the output of the decoder.
67 The format is whatever the other side sends, and the
68 buffer is allocated by avcodec_decode_... so we only
71 rem_dpy the format used to display the remote stream
73 We store the format info together with the buffer storing the data.
74 As a future optimization, a format/buffer may reference another one
75 if the formats are equivalent. This will save some unnecessary format
79 In order to handle video you need to add to sip.conf (and presumably
80 iax.conf too) the following:
84 allow=h263 ; this or other video formats
85 allow=h263p ; this or other video formats
90 * Codecs are absolutely necessary or we cannot do anything.
91 * In principle SDL is optional too (used for rendering only, but we
92 * could still source data withouth it), however at the moment it is required.
94 #if !defined(HAVE_VIDEO_CONSOLE) || !defined(HAVE_FFMPEG) || !defined(HAVE_SDL)
95 /* stubs if required pieces are missing */
96 int console_write_video(struct ast_channel *chan, struct ast_frame *f)
98 return 0; /* writing video not supported */
101 int console_video_cli(struct video_desc *env, const char *var, int fd)
103 return 1; /* nothing matched */
106 int console_video_config(struct video_desc **penv, const char *var, const char *val)
108 return 1; /* no configuration */
111 void console_video_start(struct video_desc *env, struct ast_channel *owner)
113 ast_log(LOG_NOTICE, "voice only, console video support not present\n");
116 void console_video_uninit(struct video_desc *env)
120 int console_video_formats = 0;
122 #else /* defined(HAVE_FFMPEG) && defined(HAVE_SDL) */
124 /*! The list of video formats we support. */
125 int console_video_formats =
126 AST_FORMAT_H263_PLUS | AST_FORMAT_H263 |
127 AST_FORMAT_MP4_VIDEO | AST_FORMAT_H264 | AST_FORMAT_H261 ;
130 #include <X11/Xlib.h> /* this should be conditional */
133 #include <ffmpeg/avcodec.h>
135 #include <ffmpeg/swscale.h> /* requires a recent ffmpeg */
139 #ifdef HAVE_SDL_IMAGE
140 #include <SDL/SDL_image.h> /* for loading images */
143 #include <SDL/SDL_ttf.h> /* render text on sdl surfaces */
147 * In many places we use buffers to store the raw frames (but not only),
148 * so here is a structure to keep all the info. data = NULL means the
149 * structure is not initialized, so the other fields are invalid.
150 * size = 0 means the buffer is not malloc'ed so we don't have to free it.
152 struct fbuf_t { /* frame buffers, dynamically allocated */
153 uint8_t *data; /* memory, malloced if size > 0, just reference
155 int size; /* total size in bytes */
156 int used; /* space used so far */
157 int ebit; /* bits to ignore at the end */
158 int x; /* origin, if necessary */
165 static void my_scale(struct fbuf_t *in, AVPicture *p_in,
166 struct fbuf_t *out, AVPicture *p_out);
168 struct video_codec_desc; /* forward declaration */
170 * Descriptor of the local source, made of the following pieces:
171 * + configuration info (geometry, device name, fps...). These are read
172 * from the config file and copied here before calling video_out_init();
173 * + the frame buffer (buf) and source pixel format, allocated at init time;
174 * + the encoding and RTP info, including timestamps to generate
175 * frames at the correct rate;
176 * + source-specific info, i.e. fd for /dev/video, dpy-image for x11, etc,
177 * filled in by video_open
178 * NOTE: loc_src.data == NULL means the rest of the struct is invalid, and
179 * the video source is not available.
181 struct video_out_desc {
182 /* video device support.
183 * videodevice and geometry are read from the config file.
184 * At the right time we try to open it and allocate a buffer.
185 * If we are successful, webcam_bufsize > 0 and we can read.
187 /* all the following is config file info copied from the parent */
188 char videodevice[64];
195 struct fbuf_t loc_src; /* local source buffer, allocated in video_open() */
196 struct fbuf_t enc_in; /* encoder input buffer, allocated in video_out_init() */
197 struct fbuf_t enc_out; /* encoder output buffer, allocated in video_out_init() */
198 struct fbuf_t loc_dpy; /* display source buffer, no buffer (managed by SDL in bmp[1]) */
200 struct video_codec_desc *enc; /* encoder */
201 void *enc_ctx; /* encoding context */
203 AVFrame *enc_in_frame; /* enc_in mapped into avcodec format. */
204 /* The initial part of AVFrame is an AVPicture */
206 struct timeval last_frame; /* when we read the last frame ? */
208 /* device specific info */
209 int fd; /* file descriptor, for webcam */
211 Display *dpy; /* x11 grabber info */
213 int screen_width; /* width of X screen */
214 int screen_height; /* height of X screen */
219 * Descriptor for the incoming stream, with a buffer for the bitstream
220 * extracted by the RTP packets, RTP reassembly info, and a frame buffer
221 * for the decoded frame (buf).
222 * and store the result in a suitable frame buffer for later display.
223 * NOTE: dec_ctx == NULL means the rest is invalid (e.g. because no
224 * codec, no memory, etc.) and we must drop all incoming frames.
226 * Incoming payload is stored in one of the dec_in[] buffers, which are
227 * emptied by the video thread. These buffers are organized in a circular
228 * queue, with dec_in_cur being the buffer in use by the incoming stream,
229 * and dec_in_dpy is the one being displayed. When the pointers need to
230 * be changed, we synchronize the access to them with dec_in_lock.
231 * When the list is full dec_in_cur = NULL (we cannot store new data),
232 * when the list is empty dec_in_dpy is NULL (we cannot display frames).
234 struct video_in_desc {
235 struct video_codec_desc *dec; /* decoder */
236 AVCodecContext *dec_ctx; /* information about the codec in the stream */
237 AVCodec *codec; /* reference to the codec */
238 AVFrame *d_frame; /* place to store the decoded frame */
239 AVCodecParserContext *parser;
240 uint16_t next_seq; /* must be 16 bit */
241 int discard; /* flag for discard status */
242 #define N_DEC_IN 3 /* number of incoming buffers */
243 struct fbuf_t *dec_in_cur; /* buffer being filled in */
244 struct fbuf_t *dec_in_dpy; /* buffer to display */
245 ast_mutex_t dec_in_lock;
246 struct fbuf_t dec_in[N_DEC_IN]; /* incoming bitstream, allocated/extended in fbuf_append() */
247 struct fbuf_t dec_out; /* decoded frame, no buffer (data is in AVFrame) */
248 struct fbuf_t rem_dpy; /* display remote image, no buffer (it is in win[WIN_REMOTE].bmp) */
253 * The overall descriptor, with room for config info, video source and
254 * received data descriptors, SDL info, etc.
257 char codec_name[64]; /* the codec we use */
259 pthread_t vthread; /* video thread */
260 int shutdown; /* set to shutdown vthread */
261 struct ast_channel *owner; /* owner channel */
263 struct video_in_desc in; /* remote video descriptor */
264 struct video_out_desc out; /* local video descriptor */
266 struct gui_info *gui;
268 /* support for display. */
270 SDL_Surface *screen; /* the main window */
271 char keypad_file[256]; /* image for the keypad */
272 char keypad_font[256]; /* font for the keypad */
274 char sdl_videodriver[256];
277 static AVPicture *fill_pict(struct fbuf_t *b, AVPicture *p);
279 static void fbuf_free(struct fbuf_t *b)
281 struct fbuf_t x = *b;
283 if (b->data && b->size)
285 bzero(b, sizeof(*b));
286 /* restore some fields */
289 b->pix_fmt = x.pix_fmt;
293 * Append a chunk of data to a buffer taking care of bit alignment
294 * Return 0 on success, != 0 on failure
296 static int fbuf_append(struct fbuf_t *b, uint8_t *src, int len,
300 * Allocate buffer. ffmpeg wants an extra FF_INPUT_BUFFER_PADDING_SIZE,
301 * and also wants 0 as a buffer terminator to prevent trouble.
303 int need = len + FF_INPUT_BUFFER_PADDING_SIZE;
307 if (b->data == NULL) {
311 b->data = ast_calloc(1, b->size);
312 } else if (b->used + need > b->size) {
313 b->size = b->used + need;
314 b->data = ast_realloc(b->data, b->size);
316 if (b->data == NULL) {
317 ast_log(LOG_WARNING, "alloc failure for %d, discard\n",
321 if (b->used == 0 && b->ebit != 0) {
322 ast_log(LOG_WARNING, "ebit not reset at start\n");
325 dst = b->data + b->used;
326 i = b->ebit + sbit; /* bits to ignore around */
327 if (i == 0) { /* easy case, just append */
328 /* do everything in the common block */
329 } else if (i == 8) { /* easy too, just handle the overlap byte */
330 mask = (1 << b->ebit) - 1;
331 /* update the last byte in the buffer */
332 dst[-1] &= ~mask; /* clear bits to ignore */
333 dst[-1] |= (*src & mask); /* append new bits */
334 src += 1; /* skip and prepare for common block */
336 } else { /* must shift the new block, not done yet */
337 ast_log(LOG_WARNING, "must handle shift %d %d at %d\n",
338 b->ebit, sbit, b->used);
341 memcpy(dst, src, len);
344 b->data[b->used] = 0; /* padding */
349 * Build an ast_frame for a given chunk of data, and link it into
350 * the queue, with possibly 'head' bytes at the beginning to
351 * fill in some fields later.
353 static struct ast_frame *create_video_frame(uint8_t *start, uint8_t *end,
354 int format, int head, struct ast_frame *prev)
360 data = ast_calloc(1, len+head);
361 f = ast_calloc(1, sizeof(*f));
362 if (f == NULL || data == NULL) {
363 ast_log(LOG_WARNING, "--- frame error f %p data %p len %d format %d\n",
364 f, data, len, format);
371 memcpy(data+head, start, len);
373 f->mallocd = AST_MALLOCD_DATA | AST_MALLOCD_HDR;
374 //f->has_timing_info = 1;
375 //f->ts = ast_tvdiff_ms(ast_tvnow(), out->ts);
376 f->datalen = len+head;
377 f->frametype = AST_FRAME_VIDEO;
378 f->subclass = format;
382 f->delivery.tv_sec = 0;
383 f->delivery.tv_usec = 0;
385 AST_LIST_NEXT(f, frame_list) = NULL;
388 AST_LIST_NEXT(prev, frame_list) = f;
394 #include "console_gui.c"
396 /*------ end codec specific code -----*/
399 /* Video4Linux stuff is only used in video_open() */
400 #ifdef HAVE_VIDEODEV_H
401 #include <linux/videodev.h>
405 * Open the local video source and allocate a buffer
406 * for storing the image. Return 0 on success, -1 on error
408 static int video_open(struct video_out_desc *v)
410 struct fbuf_t *b = &v->loc_src;
411 if (b->data) /* buffer allocated means device already open */
415 * if the device is "X11", then open the x11 grabber
417 if (!strcasecmp(v->videodevice, "X11")) {
421 /* init the connection with the X server */
422 v->dpy = XOpenDisplay(NULL);
423 if (v->dpy == NULL) {
424 ast_log(LOG_WARNING, "error opening display\n");
428 /* find width and height of the screen */
429 screen_num = DefaultScreen(v->dpy);
430 v->screen_width = DisplayWidth(v->dpy, screen_num);
431 v->screen_height = DisplayHeight(v->dpy, screen_num);
433 v->image = im = XGetImage(v->dpy,
434 RootWindow(v->dpy, DefaultScreen(v->dpy)),
435 b->x, b->y, b->w, b->h, AllPlanes, ZPixmap);
436 if (v->image == NULL) {
437 ast_log(LOG_WARNING, "error creating Ximage\n");
440 switch (im->bits_per_pixel) {
442 b->pix_fmt = PIX_FMT_RGBA32;
445 b->pix_fmt = (im->green_mask == 0x7e0) ? PIX_FMT_RGB565 : PIX_FMT_RGB555;
449 ast_log(LOG_NOTICE, "image: data %p %d bpp fmt %d, mask 0x%lx 0x%lx 0x%lx\n",
453 im->red_mask, im->green_mask, im->blue_mask);
455 /* set the pointer but not the size as this is not malloc'ed */
456 b->data = (uint8_t *)im->data;
459 #ifdef HAVE_VIDEODEV_H
462 struct video_window vw = { 0 }; /* camera attributes */
463 struct video_picture vp;
465 const char *dev = v->videodevice;
467 v->fd = open(dev, O_RDONLY | O_NONBLOCK);
469 ast_log(LOG_WARNING, "error opening camera %s\n", v->videodevice);
473 i = fcntl(v->fd, F_GETFL);
474 if (-1 == fcntl(v->fd, F_SETFL, i | O_NONBLOCK)) {
475 /* non fatal, just emit a warning */
476 ast_log(LOG_WARNING, "error F_SETFL for %s [%s]\n",
477 dev, strerror(errno));
479 /* set format for the camera.
480 * In principle we could retry with a different format if the
481 * one we are asking for is not supported.
483 vw.width = v->loc_src.w;
484 vw.height = v->loc_src.h;
485 vw.flags = v->fps << 16;
486 if (ioctl(v->fd, VIDIOCSWIN, &vw) == -1) {
487 ast_log(LOG_WARNING, "error setting format for %s [%s]\n",
488 dev, strerror(errno));
491 if (ioctl(v->fd, VIDIOCGPICT, &vp) == -1) {
492 ast_log(LOG_WARNING, "error reading picture info\n");
496 "contrast %d bright %d colour %d hue %d white %d palette %d\n",
497 vp.contrast, vp.brightness,
499 vp.whiteness, vp.palette);
500 /* set the video format. Here again, we don't necessary have to
501 * fail if the required format is not supported, but try to use
502 * what the camera gives us.
504 b->pix_fmt = vp.palette;
505 vp.palette = VIDEO_PALETTE_YUV420P;
506 if (ioctl(v->fd, VIDIOCSPICT, &vp) == -1) {
507 ast_log(LOG_WARNING, "error setting palette, using %d\n",
510 b->pix_fmt = vp.palette;
511 /* allocate the source buffer.
512 * XXX, the code here only handles yuv411, for other formats
513 * we need to look at pix_fmt and set size accordingly
515 b->size = (b->w * b->h * 3)/2; /* yuv411 */
516 ast_log(LOG_WARNING, "videodev %s opened, size %dx%d %d\n",
517 dev, b->w, b->h, b->size);
518 v->loc_src.data = ast_calloc(1, b->size);
520 ast_log(LOG_WARNING, "error allocating buffer %d bytes\n",
524 ast_log(LOG_WARNING, "success opening camera\n");
526 #endif /* HAVE_VIDEODEV_H */
528 if (v->image == NULL && v->fd < 0)
534 ast_log(LOG_WARNING, "fd %d dpy %p img %p data %p\n",
535 v->fd, v->dpy, v->image, v->loc_src.data);
536 /* XXX maybe XDestroy (v->image) ? */
538 XCloseDisplay(v->dpy);
543 fbuf_free(&v->loc_src);
547 /*! \brief complete a buffer from the local video source.
548 * Called by get_video_frames(), in turn called by the video thread.
550 static int video_read(struct video_out_desc *v)
552 struct timeval now = ast_tvnow();
553 struct fbuf_t *b = &v->loc_src;
555 if (b->data == NULL) /* not initialized */
558 /* check if it is time to read */
559 if (ast_tvzero(v->last_frame))
561 if (ast_tvdiff_ms(now, v->last_frame) < 1000/v->fps)
562 return 0; /* too early */
563 v->last_frame = now; /* XXX actually, should correct for drift */
567 /* read frame from X11 */
570 RootWindow(v->dpy, DefaultScreen(v->dpy)),
571 b->x, b->y, b->w, b->h, AllPlanes, ZPixmap, v->image, 0, 0);
573 b->data = (uint8_t *)v->image->data;
575 return p.linesize[0] * b->h;
578 if (v->fd < 0) /* no other source */
581 int r, l = v->loc_src.size - v->loc_src.used;
582 r = read(v->fd, v->loc_src.data + v->loc_src.used, l);
583 // ast_log(LOG_WARNING, "read %d of %d bytes from webcam\n", r, l);
584 if (r < 0) /* read error */
586 if (r == 0) /* no data */
588 v->loc_src.used += r;
590 v->loc_src.used = 0; /* prepare for next frame */
591 return v->loc_src.size;
596 /* Helper function to process incoming video.
597 * For each incoming video call invoke ffmpeg_init() to intialize
598 * the decoding structure then incoming video frames are processed
599 * by write_video() which in turn calls pre_process_data(), to extract
600 * the bitstream; accumulates data into a buffer within video_desc. When
601 * a frame is complete (determined by the marker bit in the RTP header)
602 * call decode_video() to decoding and if it successful call show_frame()
603 * to display the frame.
607 * Map the codec name to the library. If not recognised, use a default.
608 * This is useful in the output path where we decide by name, presumably.
610 static struct video_codec_desc *map_config_video_format(char *name)
614 for (i = 0; supported_codecs[i]; i++)
615 if (!strcasecmp(name, supported_codecs[i]->name))
617 if (supported_codecs[i] == NULL) {
618 ast_log(LOG_WARNING, "Cannot find codec for '%s'\n", name);
620 strcpy(name, supported_codecs[i]->name);
622 ast_log(LOG_WARNING, "Using codec '%s'\n", name);
623 return supported_codecs[i];
626 /*! \brief uninitialize the descriptor for remote video stream */
627 static int video_in_uninit(struct video_in_desc *v)
632 av_parser_close(v->parser);
636 avcodec_close(v->dec_ctx);
644 v->codec = NULL; /* only a reference */
645 v->dec = NULL; /* forget the decoder */
646 v->discard = 1; /* start in discard mode */
647 for (i = 0; i < N_DEC_IN; i++)
648 fbuf_free(&v->dec_in[i]);
649 fbuf_free(&v->dec_out);
650 fbuf_free(&v->rem_dpy);
651 return -1; /* error, in case someone cares */
655 * initialize ffmpeg resources used for decoding frames from the network.
657 static int video_in_init(struct video_in_desc *v, uint32_t format)
661 /* XXX should check that these are already set */
668 codec = map_video_format(format, CM_RD);
670 v->codec = avcodec_find_decoder(codec);
672 ast_log(LOG_WARNING, "Unable to find the decoder for format %d\n", codec);
673 return video_in_uninit(v);
676 * Initialize the codec context.
678 v->dec_ctx = avcodec_alloc_context();
679 /* XXX call dec_init() ? */
680 if (avcodec_open(v->dec_ctx, v->codec) < 0) {
681 ast_log(LOG_WARNING, "Cannot open the codec context\n");
684 return video_in_uninit(v);
687 v->parser = av_parser_init(codec);
689 ast_log(LOG_WARNING, "Cannot initialize the decoder parser\n");
690 return video_in_uninit(v);
693 v->d_frame = avcodec_alloc_frame();
695 ast_log(LOG_WARNING, "Cannot allocate decoding video frame\n");
696 return video_in_uninit(v);
701 /*! \brief uninitialize the descriptor for local video stream */
702 static int video_out_uninit(struct video_out_desc *v)
704 /* XXX this should be a codec callback */
706 AVCodecContext *enc_ctx = (AVCodecContext *)v->enc_ctx;
707 avcodec_close(enc_ctx);
711 if (v->enc_in_frame) {
712 av_free(v->enc_in_frame);
713 v->enc_in_frame = NULL;
715 v->codec = NULL; /* only a reference */
717 fbuf_free(&v->loc_src);
718 fbuf_free(&v->enc_in);
719 fbuf_free(&v->enc_out);
720 fbuf_free(&v->loc_dpy);
721 if (v->image) { /* X11 grabber */
722 XCloseDisplay(v->dpy);
734 * Initialize the encoder for the local source:
735 * - AVCodecContext, AVCodec, AVFrame are used by ffmpeg for encoding;
736 * - encbuf is used to store the encoded frame (to be sent)
737 * - mtu is used to determine the max size of video fragment
738 * NOTE: we enter here with the video source already open.
740 static int video_out_init(struct video_desc *env)
744 struct fbuf_t *enc_in;
745 struct video_out_desc *v = &env->out;
749 v->enc_in_frame = NULL;
750 v->enc_out.data = NULL;
752 if (v->loc_src.data == NULL) {
753 ast_log(LOG_WARNING, "No local source active\n");
754 return video_out_uninit(v);
756 codec = map_video_format(v->enc->format, CM_WR);
757 v->codec = avcodec_find_encoder(codec);
759 ast_log(LOG_WARNING, "Cannot find the encoder for format %d\n",
761 return video_out_uninit(v);
764 v->mtu = 1400; /* set it early so the encoder can use it */
766 /* allocate the input buffer for encoding.
767 * ffmpeg only supports PIX_FMT_YUV420P for the encoding.
770 enc_in->pix_fmt = PIX_FMT_YUV420P;
771 enc_in->size = (enc_in->w * enc_in->h * 3)/2;
772 enc_in->data = ast_calloc(1, enc_in->size);
774 ast_log(LOG_WARNING, "Cannot allocate encoder input buffer\n");
775 return video_out_uninit(v);
777 /* construct an AVFrame that points into buf_in */
778 v->enc_in_frame = avcodec_alloc_frame();
779 if (!v->enc_in_frame) {
780 ast_log(LOG_WARNING, "Unable to allocate the encoding video frame\n");
781 return video_out_uninit(v);
784 /* parameters for PIX_FMT_YUV420P */
785 size = enc_in->w * enc_in->h;
786 v->enc_in_frame->data[0] = enc_in->data;
787 v->enc_in_frame->data[1] = v->enc_in_frame->data[0] + size;
788 v->enc_in_frame->data[2] = v->enc_in_frame->data[1] + size/4;
789 v->enc_in_frame->linesize[0] = enc_in->w;
790 v->enc_in_frame->linesize[1] = enc_in->w/2;
791 v->enc_in_frame->linesize[2] = enc_in->w/2;
793 /* now setup the parameters for the encoder.
794 * XXX should be codec-specific
797 AVCodecContext *enc_ctx = avcodec_alloc_context();
798 v->enc_ctx = enc_ctx;
799 enc_ctx->pix_fmt = enc_in->pix_fmt;
800 enc_ctx->width = enc_in->w;
801 enc_ctx->height = enc_in->h;
802 /* XXX rtp_callback ?
803 * rtp_mode so ffmpeg inserts as many start codes as possible.
805 enc_ctx->rtp_mode = 1;
806 enc_ctx->rtp_payload_size = v->mtu / 2; // mtu/2
807 enc_ctx->bit_rate = v->bitrate;
808 enc_ctx->bit_rate_tolerance = enc_ctx->bit_rate/2;
809 enc_ctx->qmin = v->qmin; /* should be configured */
810 enc_ctx->time_base = (AVRational){1, v->fps};
811 enc_ctx->gop_size = v->fps*5; // emit I frame every 5 seconds
813 v->enc->enc_init(v->enc_ctx);
815 if (avcodec_open(enc_ctx, v->codec) < 0) {
816 ast_log(LOG_WARNING, "Unable to initialize the encoder %d\n",
820 return video_out_uninit(v);
824 * Allocate enough for the encoded bitstream. As we are compressing,
825 * we hope that the output is never larger than the input size.
827 v->enc_out.data = ast_calloc(1, enc_in->size);
828 v->enc_out.size = enc_in->size;
834 /*! \brief uninitialize the entire environment.
835 * In practice, signal the thread and give it a bit of time to
836 * complete, giving up if it gets stuck. Because uninit
837 * is called from hangup with the channel locked, and the thread
838 * uses the chan lock, we need to unlock here. This is unsafe,
839 * and we should really use refcounts for the channels.
841 void console_video_uninit(struct video_desc *env)
843 int i, t = 100; /* initial wait is shorter, than make it longer */
845 for (i=0; env->shutdown && i < 10; i++) {
846 ast_channel_unlock(env->owner);
849 ast_channel_lock(env->owner);
854 /*! fill an AVPicture from our fbuf info, as it is required by
855 * the image conversion routines in ffmpeg.
856 * XXX This depends on the format.
858 static AVPicture *fill_pict(struct fbuf_t *b, AVPicture *p)
860 /* provide defaults for commonly used formats */
861 int l4 = b->w * b->h/4; /* size of U or V frame */
862 int len = b->w; /* Y linesize, bytes */
863 int luv = b->w/2; /* U/V linesize, bytes */
865 bzero(p, sizeof(*p));
866 switch (b->pix_fmt) {
876 case PIX_FMT_YUYV422: /* Packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr */
877 len *= 2; /* all data in first plane, probably */
881 p->data[0] = b->data;
882 p->linesize[0] = len;
883 /* these are only valid for component images */
884 p->data[1] = luv ? b->data + 4*l4 : b->data+len;
885 p->data[2] = luv ? b->data + 5*l4 : b->data+len;
886 p->linesize[1] = luv;
887 p->linesize[2] = luv;
891 /*! convert/scale between an input and an output format.
892 * Old version of ffmpeg only have img_convert, which does not rescale.
893 * New versions use sws_scale which does both.
895 static void my_scale(struct fbuf_t *in, AVPicture *p_in,
896 struct fbuf_t *out, AVPicture *p_out)
898 AVPicture my_p_in, my_p_out;
901 p_in = fill_pict(in, &my_p_in);
903 p_out = fill_pict(out, &my_p_out);
906 /* XXX img_convert is deprecated, and does not do rescaling */
907 img_convert(p_out, out->pix_fmt,
908 p_in, in->pix_fmt, in->w, in->h);
909 #else /* XXX replacement */
911 struct SwsContext *convert_ctx;
913 convert_ctx = sws_getContext(in->w, in->h, in->pix_fmt,
914 out->w, out->h, out->pix_fmt,
915 SWS_BICUBIC, NULL, NULL, NULL);
916 if (convert_ctx == NULL) {
917 ast_log(LOG_ERROR, "FFMPEG::convert_cmodel : swscale context initialization failed");
921 ast_log(LOG_WARNING, "in %d %dx%d out %d %dx%d\n",
922 in->pix_fmt, in->w, in->h, out->pix_fmt, out->w, out->h);
923 sws_scale(convert_ctx,
924 p_in->data, p_in->linesize,
925 in->w, in->h, /* src slice */
926 p_out->data, p_out->linesize);
928 sws_freeContext(convert_ctx);
930 #endif /* XXX replacement */
933 struct video_desc *get_video_desc(struct ast_channel *c);
936 * This function is called (by asterisk) for each video packet
937 * coming from the network (the 'in' path) that needs to be processed.
938 * We need to reconstruct the entire video frame before we can decode it.
939 * After a video packet is received we have to:
940 * - extract the bitstream with pre_process_data()
941 * - append the bitstream to a buffer
942 * - if the fragment is the last (RTP Marker) we decode it with decode_video()
943 * - after the decoding is completed we display the decoded frame with show_frame()
945 int console_write_video(struct ast_channel *chan, struct ast_frame *f);
946 int console_write_video(struct ast_channel *chan, struct ast_frame *f)
948 struct video_desc *env = get_video_desc(chan);
949 struct video_in_desc *v = &env->in;
951 if (v->dec == NULL) { /* try to get the codec */
952 v->dec = map_video_codec(f->subclass & ~1);
953 if (v->dec == NULL) {
954 ast_log(LOG_WARNING, "cannot find video codec, drop input 0x%x\n", f->subclass);
957 if (video_in_init(v, v->dec->format)) {
958 /* This is not fatal, but we won't have incoming video */
959 ast_log(LOG_WARNING, "Cannot initialize input decoder\n");
964 if (v->dec_ctx == NULL) {
965 ast_log(LOG_WARNING, "cannot decode, dropping frame\n");
966 return 0; /* error */
969 if (v->dec_in_cur == NULL) /* no buffer for incoming frames, drop */
971 #if defined(DROP_PACKETS) && DROP_PACKETS > 0
972 /* Simulate lost packets */
973 if ((random() % 10000) <= 100*DROP_PACKETS) {
974 ast_log(LOG_NOTICE, "Packet lost [%d]\n", f->seqno);
980 * In discard mode, drop packets until we find one with
981 * the RTP marker set (which is the end of frame).
982 * Note that the RTP marker flag is sent as the LSB of the
983 * subclass, which is a bitmask of formats. The low bit is
984 * normally used for audio so there is no interference.
986 if (f->subclass & 0x01) {
987 v->dec_in_cur->used = 0;
988 v->dec_in_cur->ebit = 0;
989 v->next_seq = f->seqno + 1; /* wrap at 16 bit */
991 ast_log(LOG_WARNING, "out of discard mode, frame %d\n", f->seqno);
997 * Only in-order fragments will be accepted. Remember seqno
998 * has 16 bit so there is wraparound. Also, ideally we could
999 * accept a bit of reordering, but at the moment we don't.
1001 if (v->next_seq != f->seqno) {
1002 ast_log(LOG_WARNING, "discarding frame out of order, %d %d\n",
1003 v->next_seq, f->seqno);
1009 if (f->data == NULL || f->datalen < 2) {
1010 ast_log(LOG_WARNING, "empty video frame, discard\n");
1013 if (v->dec->dec_decap(v->dec_in_cur, f->data, f->datalen)) {
1014 ast_log(LOG_WARNING, "error in dec_decap, enter discard\n");
1017 if (f->subclass & 0x01) { // RTP Marker
1018 /* prepare to decode: advance the buffer so the video thread knows. */
1019 struct fbuf_t *tmp = v->dec_in_cur; /* store current pointer */
1020 ast_mutex_lock(&v->dec_in_lock);
1021 if (++v->dec_in_cur == &v->dec_in[N_DEC_IN]) /* advance to next, circular */
1022 v->dec_in_cur = &v->dec_in[0];
1023 if (v->dec_in_dpy == NULL) { /* were not displaying anything, so set it */
1024 v->dec_in_dpy = tmp;
1025 } else if (v->dec_in_dpy == v->dec_in_cur) { /* current slot is busy */
1026 v->dec_in_cur = NULL;
1028 ast_mutex_unlock(&v->dec_in_lock);
1034 /*! \brief read a frame from webcam or X11 through video_read(),
1035 * display it, then encode and split it.
1036 * Return a list of ast_frame representing the video fragments.
1037 * The head pointer is returned by the function, the tail pointer
1038 * is returned as an argument.
1040 static struct ast_frame *get_video_frames(struct video_desc *env, struct ast_frame **tail)
1042 struct video_out_desc *v = &env->out;
1043 struct ast_frame *dummy;
1045 if (!v->loc_src.data) {
1046 static volatile int a = 0;
1048 ast_log(LOG_WARNING, "fail, no loc_src buffer\n");
1052 return NULL; /* can happen, e.g. we are reading too early */
1057 /* Scale the video for the encoder, then use it for local rendering
1058 * so we will see the same as the remote party.
1060 my_scale(&v->loc_src, NULL, &v->enc_in, NULL);
1061 show_frame(env, WIN_LOCAL);
1064 if (v->enc_out.data == NULL) {
1065 static volatile int a = 0;
1067 ast_log(LOG_WARNING, "fail, no encbuf\n");
1071 return v->enc->enc_encap(v, tail);
1075 * Helper thread to periodically poll the video source and enqueue the
1076 * generated frames to the channel's queue.
1077 * Using a separate thread also helps because the encoding can be
1078 * computationally expensive so we don't want to starve the main thread.
1080 static void *video_thread(void *arg)
1082 struct video_desc *env = arg;
1084 char save_display[128] = "";
1088 /* if sdl_videodriver is set, override the environment. Also,
1089 * if it contains 'console' override DISPLAY around the call to SDL_Init
1090 * so we use the console as opposed to the x11 version of aalib
1092 if (!ast_strlen_zero(env->sdl_videodriver)) { /* override */
1093 const char *s = getenv("DISPLAY");
1094 setenv("SDL_VIDEODRIVER", env->sdl_videodriver, 1);
1095 if (s && !strcasecmp(env->sdl_videodriver, "aalib-console")) {
1096 ast_copy_string(save_display, s, sizeof(save_display));
1097 unsetenv("DISPLAY");
1100 if (SDL_Init(SDL_INIT_VIDEO)) {
1101 ast_log(LOG_WARNING, "Could not initialize SDL - %s\n",
1103 /* again not fatal, just we won't display anything */
1107 ast_mutex_init(&env->in.dec_in_lock);
1109 if (!ast_strlen_zero(save_display))
1110 setenv("DISPLAY", save_display, 1);
1112 if (video_open(&env->out)) {
1113 ast_log(LOG_WARNING, "cannot open local video source\n");
1115 /* try to register the fd. Unfortunately, if the webcam
1116 * driver does not support select/poll we are out of luck.
1118 if (env->out.fd >= 0)
1119 ast_channel_set_fd(env->owner, 1, env->out.fd);
1120 video_out_init(env);
1124 /* XXX 20 times/sec */
1125 struct timeval t = { 0, 50000 };
1126 struct ast_frame *p, *f;
1127 struct video_in_desc *v = &env->in;
1128 struct ast_channel *chan = env->owner;
1129 int fd = chan->alertpipe[1];
1131 /* determine if video format changed */
1132 if (count++ % 10 == 0) {
1134 if (env->out.sendvideo)
1135 sprintf(buf, "%s %s %dx%d @@ %dfps %dkbps",
1136 env->out.videodevice, env->codec_name,
1137 env->out.enc_in.w, env->out.enc_in.h,
1138 env->out.fps, env->out.bitrate/1000);
1140 sprintf(buf, "hold");
1141 SDL_WM_SetCaption(buf, NULL);
1144 /* manage keypad events */
1145 /* XXX here we should always check for events,
1146 * otherwise the drag will not work */
1150 /* sleep for a while */
1151 ast_select(0, NULL, NULL, NULL, &t);
1154 SDL_UpdateRects(env->screen, 1, &env->gui->win[WIN_KEYPAD].rect);// XXX inefficient
1156 * While there is something to display, call the decoder and free
1157 * the buffer, possibly enabling the receiver to store new data.
1159 while (v->dec_in_dpy) {
1160 struct fbuf_t *tmp = v->dec_in_dpy; /* store current pointer */
1162 if (v->dec->dec_run(v, tmp))
1163 show_frame(env, WIN_REMOTE);
1164 tmp->used = 0; /* mark buffer as free */
1166 ast_mutex_lock(&v->dec_in_lock);
1167 if (++v->dec_in_dpy == &v->dec_in[N_DEC_IN]) /* advance to next, circular */
1168 v->dec_in_dpy = &v->dec_in[0];
1170 if (v->dec_in_cur == NULL) /* receiver was idle, enable it... */
1171 v->dec_in_cur = tmp; /* using the slot just freed */
1172 else if (v->dec_in_dpy == v->dec_in_cur) /* this was the last slot */
1173 v->dec_in_dpy = NULL; /* nothing more to display */
1174 ast_mutex_unlock(&v->dec_in_lock);
1178 f = get_video_frames(env, &p); /* read and display */
1184 ast_channel_lock(chan);
1186 /* AST_LIST_INSERT_TAIL is only good for one frame, cannot use here */
1187 if (chan->readq.first == NULL) {
1188 chan->readq.first = f;
1190 chan->readq.last->frame_list.next = f;
1192 chan->readq.last = p;
1194 * more or less same as ast_queue_frame, but extra
1195 * write on the alertpipe to signal frames.
1198 int blah = 1, l = sizeof(blah);
1199 for (p = f; p; p = AST_LIST_NEXT(p, frame_list)) {
1200 if (write(fd, &blah, l) != l)
1201 ast_log(LOG_WARNING, "Unable to write to alert pipe on %s, frametype/subclass %d/%d: %s!\n",
1202 chan->name, f->frametype, f->subclass, strerror(errno));
1205 ast_channel_unlock(chan);
1207 /* thread terminating, here could call the uninit */
1208 /* uninitialize the local and remote video environments */
1209 video_in_uninit(&env->in);
1210 video_out_uninit(&env->out);
1219 static void copy_geometry(struct fbuf_t *src, struct fbuf_t *dst)
1227 /*! initialize the video environment.
1228 * Apart from the formats (constant) used by sdl and the codec,
1229 * we use enc_in as the basic geometry.
1231 static void init_env(struct video_desc *env)
1233 struct fbuf_t *c = &(env->out.loc_src); /* local source */
1234 struct fbuf_t *ei = &(env->out.enc_in); /* encoder input */
1235 struct fbuf_t *ld = &(env->out.loc_dpy); /* local display */
1236 struct fbuf_t *rd = &(env->in.rem_dpy); /* remote display */
1238 c->pix_fmt = PIX_FMT_YUV420P; /* default - camera format */
1239 ei->pix_fmt = PIX_FMT_YUV420P; /* encoder input */
1240 if (ei->w == 0 || ei->h == 0) {
1244 ld->pix_fmt = rd->pix_fmt = PIX_FMT_YUV420P; /* sdl format */
1245 /* inherit defaults */
1246 copy_geometry(ei, c); /* camera inherits from encoder input */
1247 copy_geometry(ei, rd); /* remote display inherits from encoder input */
1248 copy_geometry(rd, ld); /* local display inherits from remote display */
1252 * The first call to the video code, called by oss_new() or similar.
1253 * Here we initialize the various components we use, namely SDL for display,
1254 * ffmpeg for encoding/decoding, and a local video source.
1255 * We do our best to progress even if some of the components are not
1258 void console_video_start(struct video_desc *env, struct ast_channel *owner)
1260 if (env == NULL) /* video not initialized */
1262 if (owner == NULL) /* nothing to do if we don't have a channel */
1266 env->out.enc = map_config_video_format(env->codec_name);
1268 ast_log(LOG_WARNING, "start video out %s %dx%d\n",
1269 env->codec_name, env->out.enc_in.w, env->out.enc_in.h);
1271 * Register all codecs supported by the ffmpeg library.
1272 * We only need to do it once, but probably doesn't
1273 * harm to do it multiple times.
1276 avcodec_register_all();
1277 av_log_set_level(AV_LOG_ERROR); /* only report errors */
1279 if (env->out.fps == 0) {
1281 ast_log(LOG_WARNING, "fps unset, forcing to %d\n", env->out.fps);
1283 if (env->out.bitrate == 0) {
1284 env->out.bitrate = 65000;
1285 ast_log(LOG_WARNING, "bitrate unset, forcing to %d\n", env->out.bitrate);
1288 ast_pthread_create_background(&env->vthread, NULL, video_thread, env);
1292 * Parse a geometry string, accepting also common names for the formats.
1293 * Trick: if we have a leading > or < and a numeric geometry,
1294 * return the larger or smaller one.
1295 * E.g. <352x288 gives the smaller one, 320x240
1297 static int video_geom(struct fbuf_t *b, const char *s)
1302 const char *s; int w; int h;
1303 } *fp, formats[] = {
1306 {"qvga", 320, 240 },
1307 {"qcif", 176, 144 },
1308 {"sqcif", 128, 96 },
1311 if (*s == '<' || *s == '>')
1312 sscanf(s+1,"%dx%d", &w, &h);
1313 for (fp = formats; fp->s; fp++) {
1314 if (*s == '>') { /* look for a larger one */
1317 fp--; /* back one step if possible */
1320 } else if (*s == '<') { /* look for a smaller one */
1323 } else if (!strcasecmp(s, fp->s)) { /* look for a string */
1327 if (*s == '<' && fp->s == NULL) /* smallest */
1332 } else if (sscanf(s, "%dx%d", &b->w, &b->h) != 2) {
1333 ast_log(LOG_WARNING, "Invalid video_size %s, using 352x288\n", s);
1340 /* extend ast_cli with video commands. Called by console_video_config */
1341 int console_video_cli(struct video_desc *env, const char *var, int fd)
1344 return 1; /* unrecognised */
1346 if (!strcasecmp(var, "videodevice")) {
1347 ast_cli(fd, "videodevice is [%s]\n", env->out.videodevice);
1348 } else if (!strcasecmp(var, "videocodec")) {
1349 ast_cli(fd, "videocodec is [%s]\n", env->codec_name);
1350 } else if (!strcasecmp(var, "sendvideo")) {
1351 ast_cli(fd, "sendvideo is [%s]\n", env->out.sendvideo ? "on" : "off");
1352 } else if (!strcasecmp(var, "video_size")) {
1353 ast_cli(fd, "sizes: video %dx%d camera %dx%d local %dx%d remote %dx%d in %dx%d\n",
1354 env->out.enc_in.w, env->out.enc_in.h,
1355 env->out.loc_src.w, env->out.loc_src.h,
1356 env->out.loc_dpy.w, env->out.loc_src.h,
1357 env->in.rem_dpy.w, env->in.rem_dpy.h,
1358 env->in.dec_out.w, env->in.dec_out.h);
1359 } else if (!strcasecmp(var, "bitrate")) {
1360 ast_cli(fd, "bitrate is [%d]\n", env->out.bitrate);
1361 } else if (!strcasecmp(var, "qmin")) {
1362 ast_cli(fd, "qmin is [%d]\n", env->out.qmin);
1363 } else if (!strcasecmp(var, "fps")) {
1364 ast_cli(fd, "fps is [%d]\n", env->out.fps);
1366 return 1; /* unrecognised */
1368 return 0; /* recognised */
1371 /*! parse config command for video support. */
1372 int console_video_config(struct video_desc **penv,
1373 const char *var, const char *val)
1375 struct video_desc *env;
1378 ast_log(LOG_WARNING, "bad argument penv=NULL\n");
1379 return 1; /* error */
1381 /* allocate the video descriptor first time we get here */
1384 env = *penv = ast_calloc(1, sizeof(struct video_desc));
1386 ast_log(LOG_WARNING, "fail to allocate video_desc\n");
1387 return 1; /* error */
1390 /* set default values */
1391 ast_copy_string(env->out.videodevice, "X11", sizeof(env->out.videodevice));
1393 env->out.bitrate = 65000;
1394 env->out.sendvideo = 1;
1398 CV_STR("videodevice", env->out.videodevice);
1399 CV_BOOL("sendvideo", env->out.sendvideo);
1400 CV_F("video_size", video_geom(&env->out.enc_in, val));
1401 CV_F("camera_size", video_geom(&env->out.loc_src, val));
1402 CV_F("local_size", video_geom(&env->out.loc_dpy, val));
1403 CV_F("remote_size", video_geom(&env->in.rem_dpy, val));
1404 CV_STR("keypad", env->keypad_file);
1405 CV_F("region", keypad_cfg_read(env->gui, val));
1406 CV_STR("keypad_font", env->keypad_font);
1407 CV_STR("sdl_videodriver", env->sdl_videodriver);
1408 CV_UINT("fps", env->out.fps);
1409 CV_UINT("bitrate", env->out.bitrate);
1410 CV_UINT("qmin", env->out.qmin);
1411 CV_STR("videocodec", env->codec_name);
1412 return 1; /* nothing found */
1414 CV_END; /* the 'nothing found' case */
1415 return 0; /* found something */
1418 #endif /* video support */