encode h264 video using ffmpeg library memory issues - c

I'm trying to do screen capture on OS X using ffmpeg's avfoundation library. I capture frames from the screen and encode it using H264 into an flv container.
Here's the command line output of the program:
Input #0, avfoundation, from 'Capture screen 0':
Duration: N/A, start: 9.253649, bitrate: N/A
Stream #0:0: Video: rawvideo (UYVY / 0x59565955), uyvy422, 1440x900, 14.58 tbr, 1000k tbn, 1000k tbc
raw video is inCodec
FLV (Flash Video)http://localhost:8090/test.flv
[libx264 # 0x102038e00] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX
[libx264 # 0x102038e00] profile High, level 4.0
[libx264 # 0x102038e00] 264 - core 142 r2495 6a301b6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2014 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=6 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=1 weightp=2 keyint=50 keyint_min=5 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=abr mbtree=1 bitrate=400 ratetol=1.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
[tcp # 0x101a5fe70] Connection to tcp://localhost:8090 failed (Connection refused), trying next address
[tcp # 0x101a5fe70] Connection to tcp://localhost:8090 failed: Connection refused
url_fopen failed: Operation now in progress
[flv # 0x102038800] Using AVStream.codec.time_base as a timebase hint to the muxer is deprecated. Set AVStream.time_base instead.
encoded frame #0
encoded frame #1
......
encoded frame #49
encoded frame #50
testmee(8404,0x7fff7e05c300) malloc: *** error for object 0x102053e08: incorrect checksum for freed object - object was probably modified after being freed.
*** set a breakpoint in malloc_error_break to debug
(lldb) bt
* thread #10: tid = 0x43873, 0x00007fff95639286 libsystem_kernel.dylib`__pthread_kill + 10, stop reason = signal SIGABRT
* frame #0: 0x00007fff95639286 libsystem_kernel.dylib`__pthread_kill + 10
frame #1: 0x00007fff9623742f libsystem_pthread.dylib`pthread_kill + 90
frame #2: 0x00007fff977ceb53 libsystem_c.dylib`abort + 129
frame #3: 0x00007fff9ab59e06 libsystem_malloc.dylib`szone_error + 625
frame #4: 0x00007fff9ab4f799 libsystem_malloc.dylib`small_malloc_from_free_list + 1105
frame #5: 0x00007fff9ab4d3bc libsystem_malloc.dylib`szone_malloc_should_clear + 1449
frame #6: 0x00007fff9ab4c877 libsystem_malloc.dylib`malloc_zone_malloc + 71
frame #7: 0x00007fff9ab4b395 libsystem_malloc.dylib`malloc + 42
frame #8: 0x00007fff94aa63d2 IOSurface`IOSurfaceClientLookupFromMachPort + 40
frame #9: 0x00007fff94aa6b38 IOSurface`IOSurfaceLookupFromMachPort + 12
frame #10: 0x00007fff92bfa6b2 CoreGraphics`_CGYDisplayStreamFrameAvailable + 342
frame #11: 0x00007fff92f6759c CoreGraphics`CGYDisplayStreamNotification_server + 336
frame #12: 0x00007fff92bfada6 CoreGraphics`display_stream_runloop_callout + 46
frame #13: 0x00007fff956eba07 CoreFoundation`__CFMachPortPerform + 247
frame #14: 0x00007fff956eb8f9 CoreFoundation`__CFRUNLOOP_IS_CALLING_OUT_TO_A_SOURCE1_PERFORM_FUNCTION__ + 41
frame #15: 0x00007fff956eb86b CoreFoundation`__CFRunLoopDoSource1 + 475
frame #16: 0x00007fff956dd3e7 CoreFoundation`__CFRunLoopRun + 2375
frame #17: 0x00007fff956dc858 CoreFoundation`CFRunLoopRunSpecific + 296
frame #18: 0x00007fff95792ef1 CoreFoundation`CFRunLoopRun + 97
frame #19: 0x0000000105f79ff1 CMIOUnits`___lldb_unnamed_function2148$$CMIOUnits + 875
frame #20: 0x0000000105f6f2c2 CMIOUnits`___lldb_unnamed_function2127$$CMIOUnits + 14
frame #21: 0x00007fff97051765 CoreMedia`figThreadMain + 417
frame #22: 0x00007fff96235268 libsystem_pthread.dylib`_pthread_body + 131
frame #23: 0x00007fff962351e5 libsystem_pthread.dylib`_pthread_start + 176
frame #24: 0x00007fff9623341d libsystem_pthread.dylib`thread_start + 13
I've attached the code I used below.
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavdevice/avdevice.h>
#include <libavutil/opt.h>
#include <stdio.h>
#include <stdint.h>
#include <unistd.h>
/* compile using
gcc -g -o stream test.c -lavformat -lavutil -lavcodec -lavdevice -lswscale
*/
// void show_av_device() {
// inFmt->get_device_list(inFmtCtx, device_list);
// printf("Device Info=============\n");
// //avformat_open_input(&inFmtCtx,"video=Capture screen 0",inFmt,&inOptions);
// printf("===============================\n");
// }
void AVFAIL (int code, const char *what) {
char msg[500];
av_strerror(code, msg, sizeof(msg));
fprintf(stderr, "failed: %s\nerror: %s\n", what, msg);
exit(2);
}
#define AVCHECK(f) do { int e = (f); if (e < 0) AVFAIL(e, #f); } while (0)
#define AVCHECKPTR(p,f) do { p = (f); if (!p) AVFAIL(AVERROR_UNKNOWN, #f); } while (0)
void registerLibs() {
av_register_all();
avdevice_register_all();
avformat_network_init();
avcodec_register_all();
}
int main(int argc, char *argv[]) {
//conversion variables
struct SwsContext *swsCtx = NULL;
//input stream variables
AVFormatContext *inFmtCtx = NULL;
AVCodecContext *inCodecCtx = NULL;
AVCodec *inCodec = NULL;
AVInputFormat *inFmt = NULL;
AVFrame *inFrame = NULL;
AVDictionary *inOptions = NULL;
const char *streamURL = "http://localhost:8090/test.flv";
const char *name = "avfoundation";
// AVFrame *inFrameYUV = NULL;
AVPacket inPacket;
//output stream variables
AVCodecContext *outCodecCtx = NULL;
AVCodec *outCodec;
AVFormatContext *outFmtCtx = NULL;
AVOutputFormat *outFmt = NULL;
AVFrame *outFrameYUV = NULL;
AVStream *stream = NULL;
int i, videostream, ret;
int numBytes, frameFinished;
registerLibs();
inFmtCtx = avformat_alloc_context(); //alloc input context
av_dict_set(&inOptions, "pixel_format", "uyvy422", 0);
av_dict_set(&inOptions, "probesize", "7000000", 0);
inFmt = av_find_input_format(name);
ret = avformat_open_input(&inFmtCtx, "Capture screen 0:", inFmt, &inOptions);
if (ret < 0) {
printf("Could not load the context for the input device\n");
return -1;
}
if (avformat_find_stream_info(inFmtCtx, NULL) < 0) {
printf("Could not find stream info for screen\n");
return -1;
}
av_dump_format(inFmtCtx, 0, "Capture screen 0", 0);
// inFmtCtx->streams is an array of pointers of size inFmtCtx->nb_stream
videostream = av_find_best_stream(inFmtCtx, AVMEDIA_TYPE_VIDEO, -1, -1, &inCodec, 0);
if (videostream == -1) {
printf("no video stream found\n");
return -1;
} else {
printf("%s is inCodec\n", inCodec->long_name);
}
inCodecCtx = inFmtCtx->streams[videostream]->codec;
// open codec
if (avcodec_open2(inCodecCtx, inCodec, NULL) > 0) {
printf("Couldn't open codec");
return -1; // couldn't open codec
}
//setup output params
outFmt = av_guess_format(NULL, streamURL, NULL);
if(outFmt == NULL) {
printf("output format was not guessed properly");
return -1;
}
if((outFmtCtx = avformat_alloc_context()) < 0) {
printf("output context not allocated. ERROR");
return -1;
}
printf("%s", outFmt->long_name);
outFmtCtx->oformat = outFmt;
snprintf(outFmtCtx->filename, sizeof(outFmtCtx->filename), streamURL);
printf("%s\n", outFmtCtx->filename);
outCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
if(!outCodec) {
printf("could not find encoder for H264 \n" );
return -1;
}
stream = avformat_new_stream(outFmtCtx, outCodec);
outCodecCtx = stream->codec;
avcodec_get_context_defaults3(outCodecCtx, outCodec);
outCodecCtx->codec_id = AV_CODEC_ID_H264;
outCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
outCodecCtx->flags = CODEC_FLAG_GLOBAL_HEADER;
outCodecCtx->width = inCodecCtx->width;
outCodecCtx->height = inCodecCtx->height;
outCodecCtx->time_base.den = 25;
outCodecCtx->time_base.num = 1;
outCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
outCodecCtx->gop_size = 50;
outCodecCtx->bit_rate = 400000;
//setup output encoders etc
if(stream) {
ret = avcodec_open2(outCodecCtx, outCodec, NULL);
if (ret < 0) {
printf("Could not open output encoder");
return -1;
}
}
if (avio_open(&outFmtCtx->pb, outFmtCtx->filename, AVIO_FLAG_WRITE ) < 0) {
perror("url_fopen failed");
}
avio_open_dyn_buf(&outFmtCtx->pb);
ret = avformat_write_header(outFmtCtx, NULL);
if (ret != 0) {
printf("was not able to write header to output format");
return -1;
}
unsigned char *pb_buffer;
int len = avio_close_dyn_buf(outFmtCtx->pb, (unsigned char **)(&pb_buffer));
avio_write(outFmtCtx->pb, (unsigned char *)pb_buffer, len);
numBytes = avpicture_get_size(PIX_FMT_UYVY422, inCodecCtx->width, inCodecCtx->height);
// Allocate video frame
inFrame = av_frame_alloc();
swsCtx = sws_getContext(inCodecCtx->width, inCodecCtx->height, inCodecCtx->pix_fmt, inCodecCtx->width,
inCodecCtx->height, PIX_FMT_YUV420P, SWS_BILINEAR, NULL, NULL, NULL);
int frame_count = 0;
while(av_read_frame(inFmtCtx, &inPacket) >= 0) {
if(inPacket.stream_index == videostream) {
avcodec_decode_video2(inCodecCtx, inFrame, &frameFinished, &inPacket);
// 1 Frame might need more than 1 packet to be filled
if(frameFinished) {
outFrameYUV = av_frame_alloc();
uint8_t *buffer = (uint8_t *)av_malloc(numBytes);
int ret = avpicture_fill((AVPicture *)outFrameYUV, buffer, PIX_FMT_YUV420P,
inCodecCtx->width, inCodecCtx->height);
if(ret < 0){
printf("%d is return val for fill\n", ret);
return -1;
}
//convert image to YUV
sws_scale(swsCtx, (uint8_t const * const* )inFrame->data,
inFrame->linesize, 0, inCodecCtx->height,
outFrameYUV->data, outFrameYUV->linesize);
//outFrameYUV now holds the YUV scaled frame/picture
outFrameYUV->format = outCodecCtx->pix_fmt;
outFrameYUV->width = outCodecCtx->width;
outFrameYUV->height = outCodecCtx->height;
AVPacket pkt;
int got_output;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
outFrameYUV->pts = frame_count;
ret = avcodec_encode_video2(outCodecCtx, &pkt, outFrameYUV, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding video frame: %s\n", av_err2str(ret));
return -1;
}
if(got_output) {
if(stream->codec->coded_frame->key_frame) {
pkt.flags |= AV_PKT_FLAG_KEY;
}
pkt.stream_index = stream->index;
if(pkt.pts != AV_NOPTS_VALUE)
pkt.pts = av_rescale_q(pkt.pts, stream->codec->time_base, stream->time_base);
if(pkt.dts != AV_NOPTS_VALUE)
pkt.dts = av_rescale_q(pkt.dts, stream->codec->time_base, stream->time_base);
if(avio_open_dyn_buf(&outFmtCtx->pb)!= 0) {
printf("ERROR: Unable to open dynamic buffer\n");
}
ret = av_interleaved_write_frame(outFmtCtx, &pkt);
unsigned char *pb_buffer;
int len = avio_close_dyn_buf(outFmtCtx->pb, (unsigned char **)&pb_buffer);
avio_write(outFmtCtx->pb, (unsigned char *)pb_buffer, len);
} else {
ret = 0;
}
if(ret != 0) {
fprintf(stderr, "Error while writing video frame: %s\n", av_err2str(ret));
exit(1);
}
fprintf(stderr, "encoded frame #%d\n", frame_count);
frame_count++;
av_free_packet(&pkt);
av_frame_free(&outFrameYUV);
av_free(buffer);
}
}
av_free_packet(&inPacket);
}
av_write_trailer(outFmtCtx);
//close video stream
if(stream) {
avcodec_close(outCodecCtx);
}
for (i = 0; i < outFmtCtx->nb_streams; i++) {
av_freep(&outFmtCtx->streams[i]->codec);
av_freep(&outFmtCtx->streams[i]);
}
if (!(outFmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_close(outFmtCtx->pb);
/* free the output format context */
avformat_free_context(outFmtCtx);
// Free the YUV frame populated by the decoder
av_free(inFrame);
// Close the video codec (decoder)
avcodec_close(inCodecCtx);
// Close the input video file
avformat_close_input(&inFmtCtx);
return 1;
}
I'm not sure what I've done wrong here. But, what I've observed is that for each frame thats been encoded, my memory usage goes up by about 6MB. Backtracking afterward usually leads one of the following two culprits:
avf_read_frame function in avfoundation.m
av_dup_packet function in avpacket.h
Can I also get advice on the way I'm using avio_open_dyn_buff function to be able to stream over http? I've also attached my ffmpeg library versions below:
ffmpeg version N-70876-g294bb6c Copyright (c) 2000-2015 the FFmpeg developers
built with Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)
configuration: --prefix=/usr/local --enable-gpl --enable-postproc --enable-pthreads --enable-libmp3lame --enable-libtheora --enable-libx264 --enable-libvorbis --disable-mmx --disable-ssse3 --disable-armv5te --disable-armv6 --disable-neon --enable-shared --disable-static --disable-stripping
libavutil 54. 20.100 / 54. 20.100
libavcodec 56. 29.100 / 56. 29.100
libavformat 56. 26.101 / 56. 26.101
libavdevice 56. 4.100 / 56. 4.100
libavfilter 5. 13.101 / 5. 13.101
libswscale 3. 1.101 / 3. 1.101
libswresample 1. 1.100 / 1. 1.100
libpostproc 53. 3.100 / 53. 3.100
Hyper fast Audio and Video encoder
Valgrind analysis attached here because I exceeded stack overflow's character limit. http://pastebin.com/MPeRhjhN

Your code contains the following allocation:
uint8_t *buffer = (uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
But has no matching av_free(). That's probably where you're losing 6MB/frame. av_free(outFrameYUV) won't free the memory inside the struct, just the struct itself. More correct would be to use av_frame_unref() instead of these individual av_free()s.
I also see you're calling this in your loop:
av_free(inFrame);
But the allocation for that frame is done outside your loop, so after the first run, it would presumably be dead. You want to allocate and free the frame both inside the loop. And for unreferencing, use av_frame_unref() instead of av_free().
I'd recommend running your program using asan or valgrind to detect more such issues, it'll keep track of what memory is accessed when and if incorrect, where it was free'ed.

Related

FFMPEG libx265 encoding leaves memory unfreed after avcodec_free_context

I am working on H265 encoding software and, in my unit tests, I have some weird memory leaks. To found them, I have modified the encode_video.c example from FFMPEG documentation. I have changed the resolution to correspond at a 4K video, I have adapted the bitrate and I have added a pause before context allocation and another one before the final return :
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <libavcodec/avcodec.h>
#include <libavutil/opt.h>
#include <libavutil/imgutils.h>
static void encode(AVCodecContext *enc_ctx, AVFrame *frame, AVPacket *pkt,
FILE *outfile)
{
int ret;
/* send the frame to the encoder */
if (frame)
printf("Send frame %3"PRId64"\n", frame->pts);
ret = avcodec_send_frame(enc_ctx, frame);
if (ret < 0) {
fprintf(stderr, "Error sending a frame for encoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_packet(enc_ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during encoding\n");
exit(1);
}
printf("Write packet %3"PRId64" (size=%5d)\n", pkt->pts, pkt->size);
fwrite(pkt->data, 1, pkt->size, outfile);
av_packet_unref(pkt);
}
}
int main(int argc, char **argv)
{
const char *filename, *codec_name;
const AVCodec *codec;
AVCodecContext *c= NULL;
int i, ret, x, y;
FILE *f;
AVFrame *frame;
AVPacket *pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
if (argc <= 2) {
fprintf(stderr, "Usage: %s <output file> <codec name>\n", argv[0]);
exit(0);
}
filename = argv[1];
codec_name = argv[2];
sleep(10);
/* find the mpeg1video encoder */
codec = avcodec_find_encoder_by_name(codec_name);
if (!codec) {
fprintf(stderr, "Codec '%s' not found\n", codec_name);
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* put sample parameters */
c->bit_rate = 1000000;
/* resolution must be a multiple of two */
c->width = 3840;
c->height = 2160;
/* frames per second */
c->time_base = (AVRational){1, 25};
c->framerate = (AVRational){25, 1};
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec->id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
/* open it */
ret = avcodec_open2(c, codec, NULL);
if (ret < 0) {
fprintf(stderr, "Could not open codec: %s\n", av_err2str(ret));
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Could not allocate the video frame data\n");
exit(1);
}
/* encode 1 second of video */
for (i = 0; i < 25; i++) {
fflush(stdout);
/* make sure the frame data is writable */
ret = av_frame_make_writable(frame);
if (ret < 0)
exit(1);
/* prepare a dummy image */
/* Y */
for (y = 0; y < c->height; y++) {
for (x = 0; x < c->width; x++) {
frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
}
}
/* Cb and Cr */
for (y = 0; y < c->height/2; y++) {
for (x = 0; x < c->width/2; x++) {
frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
}
}
frame->pts = i;
/* encode the image */
encode(c, frame, pkt, f);
}
/* flush the encoder */
encode(c, NULL, pkt, f);
/* add sequence end code to have a real MPEG file */
if (codec->id == AV_CODEC_ID_MPEG1VIDEO || codec->id == AV_CODEC_ID_MPEG2VIDEO)
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
sleep(10);
return 0;
}
I was expecting that the RAM memory usage at the first pause is the same as the second pause but there is about 55 Mo of difference. If I increase the number of encoded frames, this difference up to 390 Mo. I have tested this code under Linux Mint LMDE 4 (roughly same as Debian 10).
I guess this memory "leak" it isn't a real memory leak but that it's some internal values used by libx265 to be maybe reused for another encoding. But has there a way to free this memory through FFMPEG API?

FFMPEG RTSP Server using muxing doc example

I am trying to develop RTSP server using FFMPEG. For that I slightly modified muxing file located at doc/example/ folder inside FFMPEG repository.
Giving my source code of RTSP server example:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <libavutil/avassert.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
#define STREAM_DURATION 10.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */
#define SCALE_FLAGS SWS_BICUBIC
// a wrapper around a single output AVStream
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmp_frame;
float t, tincr, tincr2;
struct SwsContext *sws_ctx;
struct SwrContext *swr_ctx;
} OutputStream;
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
static int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
/* Write the compressed frame to the media file. */
log_packet(fmt_ctx, pkt);
return av_interleaved_write_frame(fmt_ctx, pkt);
}
/* Add an output stream. */
static void add_stream(OutputStream *ost, AVFormatContext *oc,
AVCodec **codec,
enum AVCodecID codec_id)
{
AVCodecContext *c;
int i;
/* find the encoder */
*codec = avcodec_find_encoder(codec_id);
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams-1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
switch ((*codec)->type) {
case AVMEDIA_TYPE_AUDIO:
c->sample_fmt = (*codec)->sample_fmts ?
(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
c->bit_rate = 64000;
c->sample_rate = 44100;
if ((*codec)->supported_samplerates) {
c->sample_rate = (*codec)->supported_samplerates[0];
for (i = 0; (*codec)->supported_samplerates[i]; i++) {
if ((*codec)->supported_samplerates[i] == 44100)
c->sample_rate = 44100;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->channel_layout = AV_CH_LAYOUT_STEREO;
if ((*codec)->channel_layouts) {
c->channel_layout = (*codec)->channel_layouts[0];
for (i = 0; (*codec)->channel_layouts[i]; i++) {
if ((*codec)->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
c->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
ost->st->time_base = (AVRational){ 1, c->sample_rate };
break;
case AVMEDIA_TYPE_VIDEO:
c->codec_id = codec_id;
c->bit_rate = 400000;
/* Resolution must be a multiple of two. */
c->width = 352;
c->height = 288;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
break;
default:
break;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
/**************************************************************/
/* audio output */
static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,
uint64_t channel_layout,
int sample_rate, int nb_samples)
{
AVFrame *frame = av_frame_alloc();
int ret;
if (!frame) {
fprintf(stderr, "Error allocating an audio frame\n");
exit(1);
}
frame->format = sample_fmt;
frame->channel_layout = channel_layout;
frame->sample_rate = sample_rate;
frame->nb_samples = nb_samples;
if (nb_samples) {
ret = av_frame_get_buffer(frame, 0);
if (ret < 0) {
fprintf(stderr, "Error allocating an audio buffer\n");
exit(1);
}
}
return frame;
}
static void open_audio(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
AVCodecContext *c;
int nb_samples;
int ret;
AVDictionary *opt = NULL;
c = ost->enc;
/* open it */
av_dict_copy(&opt, opt_arg, 0);
ret = avcodec_open2(c, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
exit(1);
}
/* init signal generator */
ost->t = 0;
ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
/* increment frequency by 110 Hz per second */
ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
nb_samples = 10000;
else
nb_samples = c->frame_size;
ost->frame = alloc_audio_frame(c->sample_fmt, c->channel_layout,
c->sample_rate, nb_samples);
ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, c->channel_layout,
c->sample_rate, nb_samples);
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
/* create resampler context */
ost->swr_ctx = swr_alloc();
if (!ost->swr_ctx) {
fprintf(stderr, "Could not allocate resampler context\n");
exit(1);
}
/* set options */
av_opt_set_int (ost->swr_ctx, "in_channel_count", c->channels, 0);
av_opt_set_int (ost->swr_ctx, "in_sample_rate", c->sample_rate, 0);
av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_int (ost->swr_ctx, "out_channel_count", c->channels, 0);
av_opt_set_int (ost->swr_ctx, "out_sample_rate", c->sample_rate, 0);
av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt", c->sample_fmt, 0);
/* initialize the resampling context */
if ((ret = swr_init(ost->swr_ctx)) < 0) {
fprintf(stderr, "Failed to initialize the resampling context\n");
exit(1);
}
}
/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and
* 'nb_channels' channels. */
static AVFrame *get_audio_frame(OutputStream *ost)
{
AVFrame *frame = ost->tmp_frame;
int j, i, v;
int16_t *q = (int16_t*)frame->data[0];
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, ost->enc->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
for (j = 0; j <frame->nb_samples; j++) {
v = (int)(sin(ost->t) * 10000);
for (i = 0; i < ost->enc->channels; i++)
*q++ = v;
ost->t += ost->tincr;
ost->tincr += ost->tincr2;
}
frame->pts = ost->next_pts;
ost->next_pts += frame->nb_samples;
return frame;
}
/*
* encode one audio frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
{
AVCodecContext *c;
AVPacket pkt = { 0 }; // data and size must be 0;
AVFrame *frame;
int ret;
int got_packet;
int dst_nb_samples;
av_init_packet(&pkt);
c = ost->enc;
frame = get_audio_frame(ost);
if (frame) {
/* convert samples from native format to destination codec format, using the resampler */
/* compute destination number of samples */
dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
c->sample_rate, c->sample_rate, AV_ROUND_UP);
av_assert0(dst_nb_samples == frame->nb_samples);
/* when we pass a frame to the encoder, it may keep a reference to it
* internally;
* make sure we do not overwrite it here
*/
ret = av_frame_make_writable(ost->frame);
if (ret < 0)
exit(1);
/* convert to destination format */
ret = swr_convert(ost->swr_ctx,
ost->frame->data, dst_nb_samples,
(const uint8_t **)frame->data, frame->nb_samples);
if (ret < 0) {
fprintf(stderr, "Error while converting\n");
exit(1);
}
frame = ost->frame;
frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
ost->samples_count += dst_nb_samples;
}
ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
exit(1);
}
if (got_packet) {
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
if (ret < 0) {
fprintf(stderr, "Error while writing audio frame: %s\n",
av_err2str(ret));
exit(1);
}
}
return (frame || got_packet) ? 0 : 1;
}
/**************************************************************/
/* video output */
static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
{
AVFrame *picture;
int ret;
picture = av_frame_alloc();
if (!picture)
return NULL;
picture->format = pix_fmt;
picture->width = width;
picture->height = height;
/* allocate the buffers for the frame data */
ret = av_frame_get_buffer(picture, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate frame data.\n");
exit(1);
}
return picture;
}
static void open_video(AVFormatContext *oc, AVCodec *codec, OutputStream *ost, AVDictionary *opt_arg)
{
int ret;
AVCodecContext *c = ost->enc;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
/* open the codec */
ret = avcodec_open2(c, codec, &opt);
av_dict_free(&opt);
if (ret < 0) {
fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
exit(1);
}
/* allocate and init a re-usable frame */
ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
if (!ost->frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
/* If the output format is not YUV420P, then a temporary YUV420P
* picture is needed too. It is then converted to the required
* output format. */
ost->tmp_frame = NULL;
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
if (!ost->tmp_frame) {
fprintf(stderr, "Could not allocate temporary picture\n");
exit(1);
}
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(ost->st->codecpar, c);
if (ret < 0) {
fprintf(stderr, "Could not copy the stream parameters\n");
exit(1);
}
}
/* Prepare a dummy image. */
static void fill_yuv_image(AVFrame *pict, int frame_index,
int width, int height)
{
int x, y, i;
i = frame_index;
/* Y */
for (y = 0; y < height; y++)
for (x = 0; x < width; x++)
pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
/* Cb and Cr */
for (y = 0; y < height / 2; y++) {
for (x = 0; x < width / 2; x++) {
pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
}
}
}
static AVFrame *get_video_frame(OutputStream *ost)
{
AVCodecContext *c = ost->enc;
/* check if we want to generate more frames */
if (av_compare_ts(ost->next_pts, c->time_base,
STREAM_DURATION, (AVRational){ 1, 1 }) >= 0)
return NULL;
/* when we pass a frame to the encoder, it may keep a reference to it
* internally; make sure we do not overwrite it here */
if (av_frame_make_writable(ost->frame) < 0)
exit(1);
if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
/* as we only generate a YUV420P picture, we must convert it
* to the codec pixel format if needed */
if (!ost->sws_ctx) {
ost->sws_ctx = sws_getContext(c->width, c->height,
AV_PIX_FMT_YUV420P,
c->width, c->height,
c->pix_fmt,
SCALE_FLAGS, NULL, NULL, NULL);
if (!ost->sws_ctx) {
fprintf(stderr,
"Could not initialize the conversion context\n");
exit(1);
}
}
fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
sws_scale(ost->sws_ctx,
(const uint8_t * const *)ost->tmp_frame->data, ost->tmp_frame->linesize,
0, c->height, ost->frame->data, ost->frame->linesize);
} else {
fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
}
ost->frame->pts = ost->next_pts++;
return ost->frame;
}
/*
* encode one video frame and send it to the muxer
* return 1 when encoding is finished, 0 otherwise
*/
static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
{
int ret;
AVCodecContext *c;
AVFrame *frame;
int got_packet = 0;
AVPacket pkt = { 0 };
c = ost->enc;
frame = get_video_frame(ost);
av_init_packet(&pkt);
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding video frame: %s\n", av_err2str(ret));
exit(1);
}
if (got_packet) {
ret = write_frame(oc, &c->time_base, ost->st, &pkt);
} else {
ret = 0;
}
if (ret < 0) {
fprintf(stderr, "Error while writing video frame: %s\n", av_err2str(ret));
exit(1);
}
return (frame || got_packet) ? 0 : 1;
}
static void close_stream(AVFormatContext *oc, OutputStream *ost)
{
avcodec_free_context(&ost->enc);
av_frame_free(&ost->frame);
av_frame_free(&ost->tmp_frame);
sws_freeContext(ost->sws_ctx);
swr_free(&ost->swr_ctx);
}
/**************************************************************/
/* media file output */
int main(int argc, char **argv)
{
OutputStream video_st = { 0 }, audio_st = { 0 };
const char *filename;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVCodec *audio_codec, *video_codec;
int ret;
int have_video = 0, have_audio = 0;
int encode_video = 0, encode_audio = 0;
AVDictionary *opt = NULL;
int i;
/* Initialize libavcodec, and register all codecs and formats. */
av_register_all();
avformat_network_init();
if (argc < 2) {
printf("usage: %s output_file\n"
"API example program to output a media file with libavformat.\n"
"This program generates a synthetic audio and video stream, encodes and\n"
"muxes them into a file named output_file.\n"
"The output format is automatically guessed according to the file extension.\n"
"Raw images can also be output by using '%%d' in the filename.\n"
"\n", argv[0]);
return 1;
}
filename = argv[1];
for (i = 2; i+1 < argc; i+=2) {
if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
}
/* allocate the output media context */
avformat_alloc_output_context2(&oc, NULL, "rtsp", filename);
if (!oc) {
printf("Could not deduce output format from file extension: using MPEG.\n");
avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
}
if (!oc)
return 1;
fmt = oc->oformat;
/* Add the audio and video streams using the default format codecs
* and initialize the codecs. */
if (fmt->video_codec != AV_CODEC_ID_NONE) {
add_stream(&video_st, oc, &video_codec, fmt->video_codec);
have_video = 1;
encode_video = 1;
}
if (fmt->audio_codec != AV_CODEC_ID_NONE) {
add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
have_audio = 1;
encode_audio = 1;
}
/* Now that all the parameters are set, we can open the audio and
* video codecs and allocate the necessary encode buffers. */
if (have_video)
open_video(oc, video_codec, &video_st, opt);
if (have_audio)
open_audio(oc, audio_codec, &audio_st, opt);
av_dump_format(oc, 0, filename, 1);
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open '%s': %s\n", filename,
av_err2str(ret));
return 1;
}
}
/* Write the stream header, if any. */
ret = avformat_write_header(oc, &opt);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
return 1;
}
while (encode_video || encode_audio) {
/* select the stream to encode */
if (encode_video &&
(!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
encode_video = !write_video_frame(oc, &video_st);
} else {
encode_audio = !write_audio_frame(oc, &audio_st);
}
}
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(oc);
/* Close each codec. */
if (have_video)
close_stream(oc, &video_st);
if (have_audio)
close_stream(oc, &audio_st);
if (!(fmt->flags & AVFMT_NOFILE))
/* Close the output file. */
avio_closep(&oc->pb);
/* free the stream */
avformat_free_context(oc);
return 0;
}
After compiling it, I am running binary:
$ ./muxing rtsp://127.0.0.1/test
Output #0, rtsp, to 'rtsp://127.0.0.1/test':
Stream #0:0: Video: mpeg4, yuv420p, 352x288, q=2-31, 400 kb/s, 25 tbn
Stream #0:1: Audio: aac (LC), 44100 Hz, stereo, fltp, 64 kb/s
[tcp # 0x2b9d220] Connection to tcp://127.0.0.1:554?timeout=0 failed: Connection refused
Error occurred when opening output file: Connection refused
But getting Connection refused error,
I created this repository for rtsp server using ffserver code:
https://github.com/harshil1991/ffserver.git
Now I can able to integrate this source code in my existing repo.

libavcodec : how to encode with h264 codec ,with mp4 container using controllable frame rate and bitrate(through c code)

I am trying to record the screen of a pc and encode the recorded frames using h264 encoder
and wrap them into a mp4 container.I want to do this because this super user link https://superuser.com/questions/300897/what-is-a-codec-e-g-divx-and-how-does-it-differ-from-a-file-format-e-g-mp/300997#300997 suggests it allows good trade-off between size and quality of the output file.
The application I am working on should allow users to record a few hours of video and have the minimum output file size with decent quality.
The code I have cooked up so far allows me to record and save .mpg(container) files with the mpeg1video encoder
Running:
ffmpeg -i test.mpg
on the output file gives the following output:
[mpegvideo # 028c7400] Estimating duration from bitrate, this may be inaccurate
Input #0, mpegvideo, from 'test.mpg':
Duration: 00:00:00.29, bitrate: 104857 kb/s
Stream #0:0: Video: mpeg1video, yuv420p(tv), 1366x768 [SAR 1:1 DAR 683:384], 104857 kb/s, 25 fps, 25 tbr, 1200k tbn, 25 tbc
I have these settings for my output:
const char * filename="test.mpg";
int codec_id= AV_CODEC_ID_MPEG1VIDEO;
AVCodec *codec11;
AVCodecContext *outContext= NULL;
int got_output;
FILE *f;
AVPacket pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
/* put sample parameters */
outContext->bit_rate = 400000;
/* resolution must be a multiple of two */
outContext->width=pCodecCtx->width;
outContext->height=pCodecCtx->height;
/* frames per second */
outContext->time_base.num=1;
outContext->time_base.den=25;
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
outContext->gop_size = 10;
outContext->max_b_frames = 1;
outContext->pix_fmt = AV_PIX_FMT_YUV420P;
When I change int codec_id= AV_CODEC_ID_MPEG1VIDEO to int codec_id= AV_CODEC_ID_H264 i get a file that does not play with vlc.
I have read that writing the
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
array at the end of your file when finished encoding makes your file a legitimate mpeg file.It is written like this:
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
in my code. Should I do the same thing when I change my encoder to AV_CODEC_ID_H264?
I am capturing using gdi input like this:
AVDictionary* options = NULL;
//Set some options
//grabbing frame rate
av_dict_set(&options,"framerate","30",0);
AVInputFormat *ifmt=av_find_input_format("gdigrab");
if(avformat_open_input(&pFormatCtx,"desktop",ifmt,&options)!=0){
printf("Couldn't open input stream.\n");
return -1;
}
I want to be able to modify my grabbing rate to optimize for the outptut file size
but When I change it to 20 for example I get a video that plays so fast.How do
I get a video that plays with normal speed with frames captured at 20 fps or any
lower frame rate value?
While recording I get the following output on the standard error output:
[gdigrab # 00cdb8e0] Capturing whole desktop as 1366x768x32 at (0,0)
Input #0, gdigrab, from '(null)':
Duration: N/A, start: 1420718663.655713, bitrate: 1006131 kb/s
Stream #0:0: Video: bmp, bgra, 1366x768, 1006131 kb/s, 29.97 tbr, 1000k tbn, 29.97 tbc
[swscaler # 00d24120] Warning: data is not aligned! This can lead to a speedloss
[mpeg1video # 00cdd160] AVFrame.format is not set
[mpeg1video # 00cdd160] AVFrame.width or height is not set
[mpeg1video # 00cdd160] AVFrame.format is not set
[mpeg1video # 00cdd160] AVFrame.width or height is not set
[mpeg1video # 00cdd160] AVFrame.format is not set
How do I get rid of this error in my code?
In summary:
1) How do I encode h264 video wrapped into mp4 container?
2) How do I capture at lower frame rates and still play
the encoded video at normal speed?
3) How do I set the format(and which format--depends on the codec?)
and width and height info on the frames I write?
The code I am using in its entirety is shown below
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
//SDL
#include "SDL.h"
#include "SDL_thread.h"
}
//Output YUV420P
#define OUTPUT_YUV420P 0
//'1' Use Dshow
//'0' Use GDIgrab
#define USE_DSHOW 0
int main(int argc, char* argv[])
{
//1.WE HAVE THE FORMAT CONTEXT
//THIS IS FROM THE DESKTOP GRAB STREAM.
AVFormatContext *pFormatCtx;
int i, videoindex;
AVCodecContext *pCodecCtx;
AVCodec *pCodec;
av_register_all();
avformat_network_init();
//ASSIGN STH TO THE FORMAT CONTEXT.
pFormatCtx = avformat_alloc_context();
//Register Device
avdevice_register_all();
//Windows
#ifdef _WIN32
#if USE_DSHOW
//Use dshow
//
//Need to Install screen-capture-recorder
//screen-capture-recorder
//Website: http://sourceforge.net/projects/screencapturer/
//
AVInputFormat *ifmt=av_find_input_format("dshow");
//if(avformat_open_input(&pFormatCtx,"video=screen-capture-recorder",ifmt,NULL)!=0){
if(avformat_open_input(&pFormatCtx,"video=UScreenCapture",ifmt,NULL)!=0){
printf("Couldn't open input stream.\n");
return -1;
}
#else
//Use gdigrab
AVDictionary* options = NULL;
//Set some options
//grabbing frame rate
av_dict_set(&options,"framerate","30",0);
//The distance from the left edge of the screen or desktop
//av_dict_set(&options,"offset_x","20",0);
//The distance from the top edge of the screen or desktop
//av_dict_set(&options,"offset_y","40",0);
//Video frame size. The default is to capture the full screen
//av_dict_set(&options,"video_size","640x480",0);
AVInputFormat *ifmt=av_find_input_format("gdigrab");
if(avformat_open_input(&pFormatCtx,"desktop",ifmt,&options)!=0){
printf("Couldn't open input stream.\n");
return -1;
}
#endif
#endif//FOR THE WIN32 THING.
if(avformat_find_stream_info(pFormatCtx,NULL)<0)
{
printf("Couldn't find stream information.\n");
return -1;
}
videoindex=-1;
for(i=0; i<pFormatCtx->nb_streams; i++)
if(pFormatCtx->streams[i]->codec->codec_type
==AVMEDIA_TYPE_VIDEO)
{
videoindex=i;
break;
}
if(videoindex==-1)
{
printf("Didn't find a video stream.\n");
return -1;
}
pCodecCtx=pFormatCtx->streams[videoindex]->codec;
pCodec=avcodec_find_decoder(pCodecCtx->codec_id);
if(pCodec==NULL)
{
printf("Codec not found.\n");
return -1;
}
if(avcodec_open2(pCodecCtx, pCodec,NULL)<0)
{
printf("Could not open codec.\n");
return -1;
}
//THIS IS WHERE YOU CONTROL THE FORMAT(THROUGH FRAMES).
AVFrame *pFrame;
pFrame=av_frame_alloc();
int ret, got_picture;
AVPacket *packet=(AVPacket *)av_malloc(sizeof(AVPacket));
//TRY TO INIT THE PACKET HERE
av_init_packet(packet);
//Output Information-----------------------------
printf("File Information---------------------\n");
av_dump_format(pFormatCtx,0,NULL,0);
printf("-------------------------------------------------\n");
//<<--FOR WRITING MPG FILES
//<<--START:PREPARE TO WRITE YOUR MPG FILE.
const char * filename="test.mpg";
int codec_id= AV_CODEC_ID_MPEG1VIDEO;
AVCodec *codec11;
AVCodecContext *outContext= NULL;
int got_output;
FILE *f;
AVPacket pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
printf("Encode video file %s\n", filename);
/* find the mpeg1 video encoder */
codec11 = avcodec_find_encoder((AVCodecID)codec_id);
if (!codec11) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
outContext = avcodec_alloc_context3(codec11);
if (!outContext) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* put sample parameters */
outContext->bit_rate = 400000;
/* resolution must be a multiple of two */
outContext->width=pCodecCtx->width;
outContext->height=pCodecCtx->height;
/* frames per second */
outContext->time_base.num=1;
outContext->time_base.den=25;
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
outContext->gop_size = 10;
outContext->max_b_frames = 1;
outContext->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(outContext->priv_data, "preset", "slow", 0);
/* open it */
if (avcodec_open2(outContext, codec11, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
AVFrame *outframe = av_frame_alloc();
int nbytes = avpicture_get_size(outContext->pix_fmt,
outContext->width,
outContext->height);
uint8_t* outbuffer = (uint8_t*)av_malloc(nbytes);
//ASSOCIATE THE FRAME TO THE ALLOCATED BUFFER.
avpicture_fill((AVPicture*)outframe, outbuffer,
AV_PIX_FMT_YUV420P,
outContext->width, outContext->height);
SwsContext* swsCtx_ ;
swsCtx_= sws_getContext(pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt,
outContext->width, outContext->height,
outContext->pix_fmt,
SWS_BICUBIC, NULL, NULL, NULL);
//HERE WE START PULLING PACKETS FROM THE SPECIFIED FORMAT CONTEXT.
while(av_read_frame(pFormatCtx, packet)>=0)
{
if(packet->stream_index==videoindex)
{
ret= avcodec_decode_video2(pCodecCtx,
pFrame,
&got_picture,packet );
if(ret < 0)
{
printf("Decode Error.\n");
return -1;
}
if(got_picture)
{
sws_scale(swsCtx_, pFrame->data, pFrame->linesize,
0, pCodecCtx->height, outframe->data,
outframe->linesize);
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
ret = avcodec_encode_video2(outContext, &pkt, outframe, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
}
av_free_packet(packet);
}//THE LOOP TO PULL PACKETS FROM THE FORMAT CONTEXT ENDS HERE.
//
/* get the delayed frames */
for (got_output = 1; got_output; i++) {
//fflush(stdout);
ret = avcodec_encode_video2(outContext, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
/* add sequence end code to have a real mpeg file */
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_close(outContext);
av_free(outContext);
//av_freep(&frame->data[0]);
//av_frame_free(&frame);
//THIS WAS ADDED LATER
av_free(outbuffer);
avcodec_close(pCodecCtx);
avformat_close_input(&pFormatCtx);
return 0;
}
Thank you for your time.
It is possible to record and play in the normal speed.
AVDictionary* options = NULL;
av_dict_set( &options, "preset", "veryslow", 0 );
following presets are available :
{
"ultrafast",
"superfast",
"veryfast",
"faster",
"fast",
"medium",
"slow",
"slower",
"veryslow",
"placebo", 0
}
set the suitable preset.

Decoding then re-encoding frames in reverse order with ffmpeg

I am pretty new to ffmpeg, and now I want to decode the frames and encode it back in reverse order. I have used this tutorial for decoding the video and this example to encode it back. I can decode the frames properly, but when I re-encode it, I get a dummy image rather than clear video. How can I get the actual reversed video?
static void video_encode_example(const char *filename, int codec_id)
{
AVCodec *codec;
AVCodecContext *c= NULL;
int i, ret, x, y, got_output;
FILE *f;
struct SwsContext *sws_ctx = NULL;
AVFrame *frame;
AVFrame *frameRGB = NULL;
AVPacket pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
printf("Encode video file %s\n", filename);
/* find the mpeg1 video encoder */
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* put sample parameters */
c->bit_rate = 400000;
/* resolution must be a multiple of two */
c->width = 352;
c->height = 288;
/* frames per second */
c->time_base = (AVRational){1,25};
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
c->gop_size = 10;
c->max_b_frames = 1;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* the image can be allocated by any means and av_image_alloc() is
* just the most convenient way if av_malloc() is to be used */
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height,
c->pix_fmt, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
/* encode 1 second of video */
for (i = 0; i < 25; i++) {
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 0;
fflush(stdout);
/* prepare a dummy image */
/* Y */
for (y = 0; y < c->height; y++) {
for (x = 0; x < c->width; x++) {
frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
}
}
/* Cb and Cr */
for (y = 0; y < c->height/2; y++) {
for (x = 0; x < c->width/2; x++) {
frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
}
}
frame->pts = i;
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
// Convert the image from its native format to RGB
sws_scale
(
sws_ctx,
(uint8_t const * const *)frame->data,
frame->linesize,
0,
c->height,
frameRGB->data,
frameRGB->linesize
);
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
/* get the delayed frames */
for (got_output = 1; got_output; i++) {
fflush(stdout);
ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
av_free_packet(&pkt);
}
}
/* add sequence end code to have a real mpeg file */
fwrite(endcode, 1, sizeof(endcode), f);
fclose(f);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
av_frame_free(&frame);
printf("\n");
}
Assuming that your code sample have nothing to do with a video input nor a reverse encoding, but is only an "encode sample".
A video file is made of severa parts.
First, you have a container format (for example mkv, or avi). It contains various informations on which streams are in this file (subtiles? how many video? multiple audio stream? stuff like that).
Then you have the data streams.
A video stream inside this file is a list of packets, encoded by a specific codec (h264 or mpeg4 for example) from frames.
In your code sample, you have a list of frames, and your are encoding them with a codec, giving you a list of packets (I suppose it works, since it is a tutorial code).
But these packets are dumped into a file, without container. So if a video player wants to read them, I will have a hard time to guess what is the format.
It is stated as the first line in your link
Instead of dumping your packets in a file directly, you want to use FFmpeg to embed your packets in a container, and save your container.
You have an example of all this process here
Last thing: if you want to have your video in reverse order, you must encode frames in the reverse order, not only mux packets in reverse order.

How does one encode a series of images into H264 using the x264 C API?

How does one use the x264 C API to encode RBG images into H264 frames? I already created a sequence of RBG images, how can I now transform that sequence into a sequence of H264 frames? In particular, how do I encode this sequence of RGB images into a sequence of H264 frame consisting of a single initial H264 keyframe followed by dependent H264 frames?
First of all: check the x264.h file, it contains more or less the reference for each function and structure. The x264.c file you can find in the download contains a sample implementation. Most people say to base yourself on that one, but I find it rather complex for beginners, it is good as an example to fall back on however.
First you set up some parameters, of the type x264_param_t, a good site describing parameters is http://mewiki.project357.com/wiki/X264_Settings . Also take a look at the x264_param_default_preset function which allows you to target some functionality without needing to understand all of the (sometimes quite complex) parameters. Also use x264_param_apply_profile afterwards (you'll probably want the "baseline" profile)
This is some example setup from my code:
x264_param_t param;
x264_param_default_preset(&param, "veryfast", "zerolatency");
param.i_threads = 1;
param.i_width = width;
param.i_height = height;
param.i_fps_num = fps;
param.i_fps_den = 1;
// Intra refres:
param.i_keyint_max = fps;
param.b_intra_refresh = 1;
//Rate control:
param.rc.i_rc_method = X264_RC_CRF;
param.rc.f_rf_constant = 25;
param.rc.f_rf_constant_max = 35;
//For streaming:
param.b_repeat_headers = 1;
param.b_annexb = 1;
x264_param_apply_profile(&param, "baseline");
After this you can initialize the encoder as follows
x264_t* encoder = x264_encoder_open(&param);
x264_picture_t pic_in, pic_out;
x264_picture_alloc(&pic_in, X264_CSP_I420, w, h)
X264 expects YUV420P data (I guess some others also, but that's the common one). You can use libswscale (from ffmpeg) to convert images to the right format. Initializing this is like this (i assume RGB data with 24bpp).
struct SwsContext* convertCtx = sws_getContext(in_w, in_h, PIX_FMT_RGB24, out_w, out_h, PIX_FMT_YUV420P, SWS_FAST_BILINEAR, NULL, NULL, NULL);
encoding is as simple as this then, for each frame do:
//data is a pointer to you RGB structure
int srcstride = w*3; //RGB stride is just 3*width
sws_scale(convertCtx, &data, &srcstride, 0, h, pic_in.img.plane, pic_in.img.stride);
x264_nal_t* nals;
int i_nals;
int frame_size = x264_encoder_encode(encoder, &nals, &i_nals, &pic_in, &pic_out);
if (frame_size >= 0)
{
// OK
}
I hope this will get you going ;), I spent a long time on it myself to get started. X264 is an insanely strong but sometimes complex piece of software.
edit: When you use other parameters there will be delayed frames, this is not the case with my parameters (mostly due to the nolatency option). If this is the case, frame_size will sometimes be zero and you'll have to call x264_encoder_encode as long as the function x264_encoder_delayed_frames does not return 0. But for this functionality you should take a deeper peek into x264.c and x264.h .
I've uploaded an example which generates raw yuv frames and then encodes them using x264. Full code can be found here: https://gist.github.com/roxlu/6453908
FFmpeg 2.8.6 C runnable example
Using FFpmeg as a wrapper for x264 is a good idea, as it exposes an uniform API for multiple encoders. So if you ever need to change formats, you can change just one parameter instead of learning a new API.
The example synthesizes and encodes some colorful frames generated by generate_rgb.
Control of frame type (I, P, B) to have as few key-frames as possible (ideally just the first) is discussed here: https://stackoverflow.com/a/36412909/895245 As mentioned there, I do not recommend it for most applications.
The key-lines that do frame type control here are:
/* Minimal distance of I-frames. This is the maximum value allowed,
or else we get a warning at runtime. */
c->keyint_min = 600;
and:
if (frame->pts == 1) {
frame->key_frame = 1;
frame->pict_type = AV_PICTURE_TYPE_I;
} else {
frame->key_frame = 0;
frame->pict_type = AV_PICTURE_TYPE_P;
}
We can then verify the frame type with:
ffprobe -select_streams v \
-show_frames \
-show_entries frame=pict_type \
-of csv \
tmp.h264
as mentioned at: https://superuser.com/questions/885452/extracting-the-index-of-key-frames-from-a-video-using-ffmpeg
Preview of generated output.
main.c
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
static AVCodecContext *c = NULL;
static AVFrame *frame;
static AVPacket pkt;
static FILE *file;
struct SwsContext *sws_context = NULL;
static void ffmpeg_encoder_set_frame_yuv_from_rgb(uint8_t *rgb) {
const int in_linesize[1] = { 3 * c->width };
sws_context = sws_getCachedContext(sws_context,
c->width, c->height, AV_PIX_FMT_RGB24,
c->width, c->height, AV_PIX_FMT_YUV420P,
0, 0, 0, 0);
sws_scale(sws_context, (const uint8_t * const *)&rgb, in_linesize, 0,
c->height, frame->data, frame->linesize);
}
uint8_t* generate_rgb(int width, int height, int pts, uint8_t *rgb) {
int x, y, cur;
rgb = realloc(rgb, 3 * sizeof(uint8_t) * height * width);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
cur = 3 * (y * width + x);
rgb[cur + 0] = 0;
rgb[cur + 1] = 0;
rgb[cur + 2] = 0;
if ((frame->pts / 25) % 2 == 0) {
if (y < height / 2) {
if (x < width / 2) {
/* Black. */
} else {
rgb[cur + 0] = 255;
}
} else {
if (x < width / 2) {
rgb[cur + 1] = 255;
} else {
rgb[cur + 2] = 255;
}
}
} else {
if (y < height / 2) {
rgb[cur + 0] = 255;
if (x < width / 2) {
rgb[cur + 1] = 255;
} else {
rgb[cur + 2] = 255;
}
} else {
if (x < width / 2) {
rgb[cur + 1] = 255;
rgb[cur + 2] = 255;
} else {
rgb[cur + 0] = 255;
rgb[cur + 1] = 255;
rgb[cur + 2] = 255;
}
}
}
}
}
return rgb;
}
/* Allocate resources and write header data to the output file. */
void ffmpeg_encoder_start(const char *filename, int codec_id, int fps, int width, int height) {
AVCodec *codec;
int ret;
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
c->bit_rate = 400000;
c->width = width;
c->height = height;
c->time_base.num = 1;
c->time_base.den = fps;
c->keyint_min = 600;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
file = fopen(filename, "wb");
if (!file) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height, c->pix_fmt, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
}
/*
Write trailing data to the output file
and free resources allocated by ffmpeg_encoder_start.
*/
void ffmpeg_encoder_finish(void) {
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
int got_output, ret;
do {
fflush(stdout);
ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
} while (got_output);
fwrite(endcode, 1, sizeof(endcode), file);
fclose(file);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
av_frame_free(&frame);
}
/*
Encode one frame from an RGB24 input and save it to the output file.
Must be called after ffmpeg_encoder_start, and ffmpeg_encoder_finish
must be called after the last call to this function.
*/
void ffmpeg_encoder_encode_frame(uint8_t *rgb) {
int ret, got_output;
ffmpeg_encoder_set_frame_yuv_from_rgb(rgb);
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
if (frame->pts == 1) {
frame->key_frame = 1;
frame->pict_type = AV_PICTURE_TYPE_I;
} else {
frame->key_frame = 0;
frame->pict_type = AV_PICTURE_TYPE_P;
}
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
}
/* Represents the main loop of an application which generates one frame per loop. */
static void encode_example(const char *filename, int codec_id) {
int pts;
int width = 320;
int height = 240;
uint8_t *rgb = NULL;
ffmpeg_encoder_start(filename, codec_id, 25, width, height);
for (pts = 0; pts < 100; pts++) {
frame->pts = pts;
rgb = generate_rgb(width, height, pts, rgb);
ffmpeg_encoder_encode_frame(rgb);
}
ffmpeg_encoder_finish();
}
int main(void) {
avcodec_register_all();
encode_example("tmp.h264", AV_CODEC_ID_H264);
encode_example("tmp.mpg", AV_CODEC_ID_MPEG1VIDEO);
return 0;
}
Compile and run with:
gcc -o main.out -std=c99 -Wextra main.c -lavcodec -lswscale -lavutil
./main.out
ffplay tmp.mpg
ffplay tmp.h264
Tested on Ubuntu 16.04. GitHub upstream.

Resources