FFmpeg C API - syncing video and audio - c

I am trimming video and having a hard getting the audio to sync correctly. The code below is as close as I've gotten it work. I've tried both re-encoding and not re-encoding the output streams.
The video trims correctly and is written to the output container. The audio stream also trims correctly, but is written to the front of the output container. For example if the trim length is 10s - the correct portion of audio plays for 10s and then the correct portion of video plays.
//////// audio stream ////////
const AVStream *input_stream_audio = input_container->streams[audio_stream_index];
const AVCodec *decoder_audio = avcodec_find_decoder(input_stream_audio->codec->codec_id);
if(!decoder_audio) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Audio decoder not found");
return -1;
}
if(avcodec_open2(input_stream_audio->codec, decoder_audio, NULL) < 0) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Error opening audio decoder");
return -1;
}
AVStream *output_stream_audio = avformat_new_stream(output_container, NULL);
if(avcodec_copy_context(output_stream_audio->codec, input_stream_audio->codec) != 0){
LOGE("=> Failed to Copy audio Context ");
return -1;
}
else {
LOGI("=> Copied audio context ");
output_stream_audio->codec->codec_id = input_stream_audio->codec->codec_id;
output_stream_audio->codec->codec_tag = 0;
output_stream_audio->pts = input_stream_audio->pts;
output_stream_audio->time_base.num = input_stream_audio->time_base.num;
output_stream_audio->time_base.den = input_stream_audio->time_base.den;
}
if(avio_open(&output_container->pb, output_file, AVIO_FLAG_WRITE) < 0) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Error opening output file");
return -1;
}
// allocate frame for conversion
decoded_frame = avcodec_alloc_frame();
if(!decoded_frame) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Error allocating frame");
return -1;
}
av_dump_format(input_container, 0, input_file, 0);
avformat_write_header(output_container, NULL);
av_init_packet(&decoded_packet);
decoded_packet.data = NULL;
decoded_packet.size = 0;
int current_frame_num = 1;
int current_frame_num_audio = 1;
int got_frame, len;
AVRational default_timebase;
default_timebase.num = 1;
default_timebase.den = AV_TIME_BASE;
int64_t starttime_int64 = av_rescale_q((int64_t)( 12.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream->time_base);
int64_t endtime_int64 = av_rescale_q((int64_t)( 18.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream->time_base);
LOGI("=> starttime_int64: %" PRId64, starttime_int64);
LOGI("=> endtime_int64: %" PRId64, endtime_int64);
int64_t starttime_int64_audio = av_rescale_q((int64_t)( 12.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream_audio->time_base);
int64_t endtime_int64_audio = av_rescale_q((int64_t)( 18.0 * AV_TIME_BASE ), AV_TIME_BASE_Q, input_stream_audio->time_base);
LOGI("=> starttime_int64_audio: %" PRId64, starttime_int64_audio);
LOGI("=> endtime_int64_audio: %" PRId64, endtime_int64_audio);
// loop input container and decode frames
while(av_read_frame(input_container, &decoded_packet)>=0) {
// video packets
if (decoded_packet.stream_index == video_stream_index) {
len = avcodec_decode_video2(input_stream->codec, decoded_frame, &got_frame, &decoded_packet);
if(len < 0) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> No frames to decode");
return -1;
}
// this is the trim range we're looking for
if(got_frame && decoded_frame->pkt_pts >= starttime_int64 && decoded_frame->pkt_pts <= endtime_int64) {
av_init_packet(&encoded_packet);
encoded_packet.data = NULL;
encoded_packet.size = 0;
ret = avcodec_encode_video2(output_stream->codec, &encoded_packet, decoded_frame, &got_frame);
if (ret < 0) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Error encoding frames");
return ret;
}
if(got_frame) {
if (output_stream->codec->coded_frame->key_frame) {
encoded_packet.flags |= AV_PKT_FLAG_KEY;
}
encoded_packet.stream_index = output_stream->index;
encoded_packet.pts = av_rescale_q(current_frame_num, output_stream->codec->time_base, output_stream->time_base);
encoded_packet.dts = av_rescale_q(current_frame_num, output_stream->codec->time_base, output_stream->time_base);
ret = av_interleaved_write_frame(output_container, &encoded_packet);
if (ret < 0) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Error encoding frames");
return ret;
}
else {
current_frame_num +=1;
}
}
av_free_packet(&encoded_packet);
}
}
// audio packets
else if(decoded_packet.stream_index == audio_stream_index) {
// this is the trim range we're looking for
if(decoded_packet.pts >= starttime_int64_audio && decoded_packet.pts <= endtime_int64_audio) {
av_init_packet(&encoded_packet);
encoded_packet.data = decoded_packet.data;
encoded_packet.size = decoded_packet.size;
encoded_packet.stream_index = audio_stream_index;
encoded_packet.pts = av_rescale_q(current_frame_num_audio, output_stream_audio->codec->time_base, output_stream_audio->time_base);
encoded_packet.dts = av_rescale_q(current_frame_num_audio, output_stream_audio->codec->time_base, output_stream_audio->time_base);
ret = av_interleaved_write_frame(output_container, &encoded_packet);
if (ret < 0) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Error encoding frames");
return ret;
}
else {
current_frame_num_audio +=1;
}
av_free_packet(&encoded_packet);
}
}
}
Edit
I have slight improvement on the initial code. The audio and video are still not perfectly synced, but the original problem of the audio playing first followed by the video is resolved.
I'm now writing the decoded packet to the output container rather than re-encoding it.
In the end though I have the same problem - the trimmed video's audio and video streams are not perfectly synced.
// audio packets
else if(decoded_packet.stream_index == audio_stream_index) {
// this is the trim range we're looking for
if(decoded_packet.pts >= starttime_int64_audio && decoded_packet.pts <= endtime_int64_audio) {
ret = av_interleaved_write_frame(output_container, &decoded_packet);
if (ret < 0) {
cleanup(decoded_packet, output_container, decoded_frame);
avformat_close_input(&input_container);
LOGE("=> Error writing audio frame (%s)", av_err2str(ret));
return ret;
}
else {
current_frame_num_audio +=1;
}
}
else if(decoded_frame->pkt_pts > endtime_int64_audio) {
audio_copy_complete = true;
}
}

I believe you should be able to make this work if you use correctly set up both codec context and stream time bases, and then after encode_video2 and encode_audio2 you call av_packet_rescale_tb with such timebases.

Related

Reading motion vectors of every 60th frame in video file

I have the following code which reads a video files and extracts motion vectors from each frame and outputs them to stdout:
#include <libavutil/motion_vector.h>
#include <libavformat/avformat.h>
static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *video_dec_ctx = NULL;
static AVStream *video_stream = NULL;
static const char *src_filename = NULL;
static int video_stream_idx = -1;
static AVFrame *frame = NULL;
static int video_frame_count = 0;
static int decode_packet(const AVPacket *pkt)
{
//fprintf(stderr, "%d\n", video_dec_ctx->width);
int ret = avcodec_send_packet(video_dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error while sending a packet to the decoder: %s\n", av_err2str(ret));
return ret;
}
while (ret >= 0) {
ret = avcodec_receive_frame(video_dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
fprintf(stderr, "Error while receiving a frame from the decoder: %s\n", av_err2str(ret));
return ret;
}
if (ret >= 0) {
int i;
AVFrameSideData *sd;
video_frame_count++;
sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MOTION_VECTORS);
if (sd) {
const AVMotionVector *mvs = (const AVMotionVector *)sd->data;
for (i = 0; i < sd->size / sizeof(*mvs); i++) {
const AVMotionVector *mv = &mvs[i];
printf("%d,%4d,%4d,%4d\n",
video_frame_count,
abs(mv->src_x - mv->dst_x),
abs(mv->src_y - mv->dst_y));
}
}
av_frame_unref(frame);
}
}
return 0;
}
static int open_codec_context(AVFormatContext *fmt_ctx, enum AVMediaType type)
{
int ret;
AVStream *st;
AVCodecContext *dec_ctx = NULL;
AVCodec *dec = NULL;
AVDictionary *opts = NULL;
ret = av_find_best_stream(fmt_ctx, type, -1, -1, &dec, 0);
if (ret < 0) {
fprintf(stderr, "Could not find %s stream in input file '%s'\n",
av_get_media_type_string(type), src_filename);
return ret;
} else {
int stream_idx = ret;
st = fmt_ctx->streams[stream_idx];
dec_ctx = avcodec_alloc_context3(dec);
if (!dec_ctx) {
fprintf(stderr, "Failed to allocate codec\n");
return AVERROR(EINVAL);
}
ret = avcodec_parameters_to_context(dec_ctx, st->codecpar);
if (ret < 0) {
fprintf(stderr, "Failed to copy codec parameters to codec context\n");
return ret;
}
/* Init the video decoder */
av_dict_set(&opts, "flags2", "+export_mvs", 0);
if ((ret = avcodec_open2(dec_ctx, dec, &opts)) < 0) {
fprintf(stderr, "Failed to open %s codec\n",
av_get_media_type_string(type));
return ret;
}
video_stream_idx = stream_idx;
video_stream = fmt_ctx->streams[video_stream_idx];
video_dec_ctx = dec_ctx;
}
return 0;
}
int main(int argc, char **argv)
{
int ret = 0;
AVPacket pkt = { 0 };
if (argc != 2) {
fprintf(stderr, "Usage: %s <video>\n", argv[0]);
exit(1);
}
src_filename = argv[1];
if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0) {
fprintf(stderr, "Could not open source file %s\n", src_filename);
exit(1);
}
if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
fprintf(stderr, "Could not find stream information\n");
exit(1);
}
open_codec_context(fmt_ctx, AVMEDIA_TYPE_VIDEO);
av_dump_format(fmt_ctx, 0, src_filename, 0);
if (!video_stream) {
fprintf(stderr, "Could not find video stream in the input, aborting\n");
ret = 1;
goto end;
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate frame\n");
ret = AVERROR(ENOMEM);
goto end;
}
printf("framenum,dx,dy\n");
/* read frames from the file */
while (av_read_frame(fmt_ctx, &pkt) >= 0) {
if (pkt.stream_index == video_stream_idx)
ret = decode_packet(&pkt);
av_packet_unref(&pkt);
if (ret < 0)
break;
}
/* flush cached frames */
decode_packet(NULL);
end:
avcodec_free_context(&video_dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
return ret < 0;
}
Instead and outputting every frame, I only want to read one frame every 60 frames. The speed of the program is very important so I want to eliminate as much unnecessary code as possible. The problem is I'm having a hard time understanding how to advance the video stream without reading in new frames. I call av_read_frame(fmt_ctx, &pkt) to get packet info that is decoded by decode_packet and then goes on to read the next, but I can't find a way to only get packet info for one frame out of each 60.
Use av_seek_frame() to read every 60th frame.

libavcodec transcoding - setting pts and dts

I'm trying to transcode video files into a standard format. This is the main loop I have:
for(frame_count = 0; av_read_frame(input_ctx, &in_packet) >= 0; frame_count++) {
if(in_packet.stream_index == video_stream_index) {
decodedPacket = 0;
rc = avcodec_decode_video2(video_in_codec, inputFrame, &decodedPacket, &in_packet);
if(decodedPacket) {
out_frames++;
rc = sws_scale(sws_ctx, inputFrame->data, inputFrame->linesize, 0, video_out_codec->height, outputFrame->data, outputFrame->linesize);
if(rc != video_out_codec->height) {
puts("scaling error");
}
outputFrame->pts = (1.0 / 30.0) * 90.0 * video_out_codec->frame_number;
rc = avcodec_encode_video2(video_out_codec, &out_packet, outputFrame, &encodedPacket);
if(rc != 0) {
puts("encoding error");
}
if(encodedPacket) {
if (video_out_stream->codec->coded_frame->key_frame) { // deprecated, what to use instead
out_packet.flags |= AV_PKT_FLAG_KEY;
}
out_packet.stream_index = 0;
rc = av_interleaved_write_frame(output_ctx, &out_packet);
if(rc != 0) {
puts("frame write error");
}
av_free_packet(&out_packet);
memset(&out_packet, 0, sizeof(AVPacket));
packet_count++;
}
av_free_packet(&in_packet);
}
}
...
When I run this I get strange values for tbr/tbn/tbc reported by ffprobe and the video is a mess.
I've tried adding code as per this answer e.g.
if (out_packet.pts != AV_NOPTS_VALUE) {
out_packet.pts = av_rescale_q(out_packet.pts, video_out_stream->codec->time_base, video_out_stream->time_base);
}
if (out_packet.pts != AV_NOPTS_VALUE) {
out_packet.dts = av_rescale_q(out_packet.dts, video_out_stream->codec->time_base, video_out_stream->time_base);
}
...but then I get errors such as this is the debug output:
[mp4 # 0x1038aba00] Delay between the first packet and last packet in the muxing queue is 10100000 > 10000000: forcing output
I'm setting the timebase correctly (I think)...
video_out_stream = avformat_new_stream(output_ctx, videoEncoder);
video_out_stream->id = 0;
video_out_stream->time_base.den = 30;
video_out_stream->time_base.num = 1;
video_out_codec = video_out_stream->codec;
avcodec_get_context_defaults3(video_out_codec, videoEncoder);
video_out_codec->codec_id = AV_CODEC_ID_H264;
video_out_codec->bit_rate = 2048;
video_out_codec->width = 854;
video_out_codec->height = 480;
video_out_codec->time_base.den = 30;
video_out_codec->time_base.num = 1;
video_out_codec->gop_size = 30;
video_out_codec->pix_fmt = AV_PIX_FMT_YUV420P;
Any thoughts about how I can calculate the correct output frame pts and encoded packet dts/pts? I'm clearly doing something wrong.

Not able to decode mp4 file using latest ffmpeg library : av_decode_video2

I am writing a wrapper code around latest ffmpeg library. I am supplying MP4 files from local system. My problem is that I am unable to get any decoded frames when I use av_decode_video2(). The return value comes out to be negative. I have used av_read_frame() which returns 0. I googled about the problem I am facing but no where could I find the correct explanation. Please give me insight here. Pasting the pseudo code here.
av_init_packet(avpkt);
picture=av_frame_alloc();
pFrameRGB=av_frame_alloc();
codec = avcodec_find_decoder(CODEC_ID_H264);
c= avcodec_alloc_context3(codec)
avcodec_open2(decoderLibraryData->c, decoderLibraryData->codec, NULL)
FormatContext = avformat_alloc_context();
char *pUrl ="./1.MP4";
iRet = avformat_open_input(atContext, pUrl, pFmt, NULL);
if(FormatContext == NULL)
{
printf("could not assign any memory !!!!!!!!! \n");
}
avformat_find_stream_info(FormatContext, NULL);
while(av_read_frame(FormatContext,avpkt) >= 0)
{
len = avcodec_decode_video2(c, picture, &got_picture,avpkt);
printf("CODEC MANAGER len %d Frame decompressed %d \n",len,got_picture);
if (len <= 0)
{
return ERROR;
}
}
}
if(lastHeight != 0 && lastWidth != 0)
{
if(lastWidth != c->width || lastHeight != c->height )
{
av_free(buffer);
buffer = NULL;
lastWidth = c->width;
lastHeight = c->height;
}
}
else
{
lastWidth = c->width;
lastHeight = c->height;
}
decodeFlag = 1;
if(!buffer)
{
int numBytes;
v_mutex_lock(globalCodecLock);
switch(inPixFormat)
{
case RGB:
// Determine required buffer size and allocate buffer
numBytes=avpicture_get_size(PIX_FMT_RGB24, c->width, c->height);
buffer=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
avpicture_fill((AVPicture *)pFrameRGB,buffer,PIX_FMT_RGB24,c->width, c->height);
if(cntxt)
sws_freeContext(cntxt);
cntxt = sws_getContext(c->width, c->height, c->pix_fmt,
c->width, c->height, PIX_FMT_RGB24, SWS_BICUBIC, NULL, NULL, NULL);
break;
}
v_mutex_unlock(globalCodecLock);
if(cntxt == NULL)
{
printf("sws_getContext error\n");
return ERROR;
}
}
{
sws_scale(cntxt, picture->data, picture->linesize, 0, c->height, pFrameRGB->data, pFrameRGB->linesize);
if(rgbBuff)
{
if(c->width <= *width && c->height <= *height)
{
saveFrame(pFrameRGB, c->width, c->height, rgbBuff,inPixFormat);
*width = c->width;
*height = c->height;
rs = SUCCESS;
break;
}
else
{
rs = VA_LOWBUFFERSIZE;
}
}
else
{
rs = VA_LOWBUFFERSIZE;
}
}
if(width)
{
*width = c->width;
}
if(height)
{
*height = c->height;
}
if(rs == VA_LOWBUFFERSIZE)
{
break;
}
I am getting the return value of av_read_frame as 0 but av_decode_video2 returns value in negative. I am not able to get any clue here.
Make sure you have called
av_register_all();
or
avcodec_register_all();
at the beginning of your app.
Also it seems the problem is from calling avformat_find_stream_info. Test with the following code:
AVPacket avpkt;
av_init_packet(&avpkt);
AVFrame* picture = av_frame_alloc();
AVFrame* pFrameRGB = av_frame_alloc();
AVFormatContext* c2 = avformat_alloc_context();
char *pUrl = "C:/Sample Videos/20-06-34.MP4";
int video_stream_index = 0;
AVInputFormat* pFmt;
int iRet = avformat_open_input(&c2, pUrl, pFmt, NULL);
AVStream* stream = c2->streams[video_stream_index];
AVCodec* codec = avcodec_find_decoder(stream->codec->codec_id);
avcodec_open2(stream->codec, codec, NULL);
if (c2 == NULL)
{
printf("could not assign any memory !!!!!!!!! \n");
}
while (av_read_frame(c2, &avpkt) >= 0)
{
int got_picture;
int len = avcodec_decode_video2(stream->codec, picture, &got_picture, &avpkt);
printf("CODEC MANAGER len %d Frame decompressed %d \n", len, got_picture);
if (len <= 0)
{
return ERROR;
}
}

Cropping Square Video using FFmpeg

Updated
So I am trying to decode a mp4 file, crop the video into a square, and then re encode it back out to another mp4 file. This is my current code but there are a few issues with it.
One is that the video doesn't keep its rotation after the video has been re encoded
Second is that the frames get outputted in a very fast video file that is not the same length as the original
Third is that there is no sound
Lastly and most importantly is do I need AVFilter to do the frame cropping or can it just be done per frame as a resize of the frame and then encoded back out.
const char *inputPath = "test.mp4";
const char *outPath = "cropped.mp4";
const char *outFileType = "mp4";
static AVFrame *oframe = NULL;
static AVFilterGraph *filterGraph = NULL;
static AVFilterContext *crop_ctx = NULL;
static AVFilterContext *buffersink_ctx = NULL;
static AVFilterContext *buffer_ctx = NULL;
int err;
int crop_video(int width, int height) {
av_register_all();
avcodec_register_all();
avfilter_register_all();
AVFormatContext *inCtx = NULL;
// open input file
err = avformat_open_input(&inCtx, inputPath, NULL, NULL);
if (err < 0) {
printf("error at open input in\n");
return err;
}
// get input file stream info
err = avformat_find_stream_info(inCtx, NULL);
if (err < 0) {
printf("error at find stream info\n");
return err;
}
// get info about video
av_dump_format(inCtx, 0, inputPath, 0);
// find video input stream
int vs = -1;
int s;
for (s = 0; s < inCtx->nb_streams; ++s) {
if (inCtx->streams[s] && inCtx->streams[s]->codec && inCtx->streams[s]->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
vs = s;
break;
}
}
// check if video stream is valid
if (vs == -1) {
printf("error at open video stream\n");
return -1;
}
// set output format
AVOutputFormat * outFmt = av_guess_format(outFileType, NULL, NULL);
if (!outFmt) {
printf("error at output format\n");
return -1;
}
// get an output context to write to
AVFormatContext *outCtx = NULL;
err = avformat_alloc_output_context2(&outCtx, outFmt, NULL, NULL);
if (err < 0 || !outCtx) {
printf("error at output context\n");
return err;
}
// input and output stream
AVStream *outStrm = avformat_new_stream(outCtx, NULL);
AVStream *inStrm = inCtx->streams[vs];
// add a new codec for the output stream
AVCodec *codec = NULL;
avcodec_get_context_defaults3(outStrm->codec, codec);
outStrm->codec->thread_count = 1;
outStrm->codec->coder_type = AVMEDIA_TYPE_VIDEO;
if(outCtx->oformat->flags & AVFMT_GLOBALHEADER) {
outStrm->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
outStrm->codec->sample_aspect_ratio = outStrm->sample_aspect_ratio = inStrm->sample_aspect_ratio;
err = avio_open(&outCtx->pb, outPath, AVIO_FLAG_WRITE);
if (err < 0) {
printf("error at opening outpath\n");
return err;
}
outStrm->disposition = inStrm->disposition;
outStrm->codec->bits_per_raw_sample = inStrm->codec->bits_per_raw_sample;
outStrm->codec->chroma_sample_location = inStrm->codec->chroma_sample_location;
outStrm->codec->codec_id = inStrm->codec->codec_id;
outStrm->codec->codec_type = inStrm->codec->codec_type;
if (!outStrm->codec->codec_tag) {
if (! outCtx->oformat->codec_tag
|| av_codec_get_id (outCtx->oformat->codec_tag, inStrm->codec->codec_tag) == outStrm->codec->codec_id
|| av_codec_get_tag(outCtx->oformat->codec_tag, inStrm->codec->codec_id) <= 0) {
outStrm->codec->codec_tag = inStrm->codec->codec_tag;
}
}
outStrm->codec->bit_rate = inStrm->codec->bit_rate;
outStrm->codec->rc_max_rate = inStrm->codec->rc_max_rate;
outStrm->codec->rc_buffer_size = inStrm->codec->rc_buffer_size;
const size_t extra_size_alloc = (inStrm->codec->extradata_size > 0) ?
(inStrm->codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE) :
0;
if (extra_size_alloc) {
outStrm->codec->extradata = (uint8_t*)av_mallocz(extra_size_alloc);
memcpy( outStrm->codec->extradata, inStrm->codec->extradata, inStrm->codec->extradata_size);
}
outStrm->codec->extradata_size = inStrm->codec->extradata_size;
AVRational input_time_base = inStrm->time_base;
AVRational frameRate = {25, 1};
if (inStrm->r_frame_rate.num && inStrm->r_frame_rate.den
&& (1.0 * inStrm->r_frame_rate.num / inStrm->r_frame_rate.den < 1000.0)) {
frameRate.num = inStrm->r_frame_rate.num;
frameRate.den = inStrm->r_frame_rate.den;
}
outStrm->r_frame_rate = frameRate;
outStrm->codec->time_base = inStrm->codec->time_base;
outStrm->codec->pix_fmt = inStrm->codec->pix_fmt;
outStrm->codec->width = width;
outStrm->codec->height = height;
outStrm->codec->has_b_frames = inStrm->codec->has_b_frames;
if (!outStrm->codec->sample_aspect_ratio.num) {
AVRational r0 = {0, 1};
outStrm->codec->sample_aspect_ratio =
outStrm->sample_aspect_ratio =
inStrm->sample_aspect_ratio.num ? inStrm->sample_aspect_ratio :
inStrm->codec->sample_aspect_ratio.num ?
inStrm->codec->sample_aspect_ratio : r0;
}
avformat_write_header(outCtx, NULL);
filterGraph = avfilter_graph_alloc();
if (!filterGraph) {
printf("could not open filter graph");
return -1;
}
AVFilter *crop = avfilter_get_by_name("crop");
AVFilter *buffer = avfilter_get_by_name("buffer");
AVFilter *buffersink = avfilter_get_by_name("buffersink");
char args[512];
snprintf(args, sizeof(args), "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
width, height, inStrm->codec->pix_fmt,
inStrm->codec->time_base.num, inStrm->codec->time_base.den,
inStrm->codec->sample_aspect_ratio.num, inStrm->codec->sample_aspect_ratio.den);
err = avfilter_graph_create_filter(&buffer_ctx, buffer, NULL, args, NULL, filterGraph);
if (err < 0) {
printf("error initializing buffer filter\n");
return err;
}
err = avfilter_graph_create_filter(&buffersink_ctx, buffersink, NULL, NULL, NULL, filterGraph);
if (err < 0) {
printf("unable to create buffersink filter\n");
return err;
}
snprintf(args, sizeof(args), "%d:%d", width, height);
err = avfilter_graph_create_filter(&crop_ctx, crop, NULL, args, NULL, filterGraph);
if (err < 0) {
printf("error initializing crop filter\n");
return err;
}
err = avfilter_link(buffer_ctx, 0, crop_ctx, 0);
if (err < 0) {
printf("error linking filters\n");
return err;
}
err = avfilter_link(crop_ctx, 0, buffersink_ctx, 0);
if (err < 0) {
printf("error linking filters\n");
return err;
}
err = avfilter_graph_config(filterGraph, NULL);
if (err < 0) {
printf("error configuring the filter graph\n");
return err;
}
printf("filtergraph configured\n");
for (;;) {
AVPacket packet = {0};
av_init_packet(&packet);
err = AVERROR(EAGAIN);
while (AVERROR(EAGAIN) == err)
err = av_read_frame(inCtx, &packet);
if (err < 0) {
if (AVERROR_EOF != err && AVERROR(EIO) != err) {
printf("eof error\n");
return 1;
} else {
break;
}
}
if (packet.stream_index == vs) {
//
// AVPacket pkt_temp_;
// memset(&pkt_temp_, 0, sizeof(pkt_temp_));
// AVPacket *pkt_temp = &pkt_temp_;
//
// *pkt_temp = packet;
//
// int error, got_frame;
// int new_packet = 1;
//
// error = avcodec_decode_video2(inStrm->codec, frame, &got_frame, pkt_temp);
// if(error < 0) {
// LOGE("error %d", error);
// }
//
// // if (error >= 0) {
//
// // push the video data from decoded frame into the filtergraph
// int err = av_buffersrc_write_frame(buffer_ctx, frame);
// if (err < 0) {
// LOGE("error writing frame to buffersrc");
// return -1;
// }
// // pull filtered video from the filtergraph
// for (;;) {
// int err = av_buffersink_get_frame(buffersink_ctx, oframe);
// if (err == AVERROR_EOF || err == AVERROR(EAGAIN))
// break;
// if (err < 0) {
// LOGE("error reading buffer from buffersink");
// return -1;
// }
// }
//
// LOGI("output frame");
err = av_interleaved_write_frame(outCtx, &packet);
if (err < 0) {
printf("error at write frame");
return -1;
}
//}
}
av_free_packet(&packet);
}
av_write_trailer(outCtx);
if (!(outCtx->oformat->flags & AVFMT_NOFILE) && outCtx->pb)
avio_close(outCtx->pb);
avformat_free_context(outCtx);
avformat_close_input(&inCtx);
return 0;
}
It looks like you are looking for vf_crop.
The CropContext contains a x, y, w, and h, which should be what you need to crop a video to a specific width and height.

How to cut video with FFmpeg C API

How can I cut video with FFmpeg C API? From 00:10:00 to 00:20:00 for example.
What functions I need to use?
I convert video using this function:
int convert(char *file) {
AVFrame *frame;
AVPacket inPacket, outPacket;
if(avio_open(&outFormatContext->pb, file, AVIO_FLAG_WRITE) < 0) {
fprintf(stderr, "convert(): cannot open out file\n");
return 0;
}
avformat_write_header(outFormatContext, NULL);
frame = avcodec_alloc_frame();
av_init_packet(&inPacket);
while(av_read_frame(inFormatContext, &inPacket) >= 0) {
if(inPacket.stream_index == inVideoStreamIndex) {
avcodec_decode_video2(inCodecContext, frame, &frameFinished, &inPacket);
if(frameFinished) {
av_init_packet(&outPacket);
avcodec_encode_video2(outCodecContext, &outPacket, frame, &outputed);
if(outputed) {
if (av_write_frame(outFormatContext, &outPacket) != 0) {
fprintf(stderr, "convert(): error while writing video frame\n");
return 0;
}
}
av_free_packet(&outPacket);
}
}
}
av_write_trailer(outFormatContext);
av_free_packet(&inPacket);
return 1;
}
As Wagner Patriota says, "if you just wanna cut the video, you don't need to reencode the video if you want". Here is the code based on ffmpeg remuxing.c example that you don't need to reencode the video.
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, const char *tag)
{
AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
tag,
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
int cut_video(double from_seconds, double end_seconds, const char* in_filename, const char* out_filename) {
AVOutputFormat *ofmt = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVPacket pkt;
int ret, i;
av_register_all();
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
fprintf(stderr, "Could not open input file '%s'", in_filename);
goto end;
}
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
fprintf(stderr, "Failed to retrieve input stream information");
goto end;
}
av_dump_format(ifmt_ctx, 0, in_filename, 0);
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx) {
fprintf(stderr, "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt = ofmt_ctx->oformat;
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
AVStream *in_stream = ifmt_ctx->streams[i];
AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
if (!out_stream) {
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
if (ret < 0) {
fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
av_dump_format(ofmt_ctx, 0, out_filename, 1);
if (!(ofmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open output file '%s'", out_filename);
goto end;
}
}
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
goto end;
}
// int indexs[8] = {0};
// int64_t start_from = 8*AV_TIME_BASE;
ret = av_seek_frame(ifmt_ctx, -1, from_seconds*AV_TIME_BASE, AVSEEK_FLAG_ANY);
if (ret < 0) {
fprintf(stderr, "Error seek\n");
goto end;
}
int64_t *dts_start_from = malloc(sizeof(int64_t) * ifmt_ctx->nb_streams);
memset(dts_start_from, 0, sizeof(int64_t) * ifmt_ctx->nb_streams);
int64_t *pts_start_from = malloc(sizeof(int64_t) * ifmt_ctx->nb_streams);
memset(pts_start_from, 0, sizeof(int64_t) * ifmt_ctx->nb_streams);
while (1) {
AVStream *in_stream, *out_stream;
ret = av_read_frame(ifmt_ctx, &pkt);
if (ret < 0)
break;
in_stream = ifmt_ctx->streams[pkt.stream_index];
out_stream = ofmt_ctx->streams[pkt.stream_index];
log_packet(ifmt_ctx, &pkt, "in");
if (av_q2d(in_stream->time_base) * pkt.pts > end_seconds) {
av_free_packet(&pkt);
break;
}
if (dts_start_from[pkt.stream_index] == 0) {
dts_start_from[pkt.stream_index] = pkt.dts;
printf("dts_start_from: %s\n", av_ts2str(dts_start_from[pkt.stream_index]));
}
if (pts_start_from[pkt.stream_index] == 0) {
pts_start_from[pkt.stream_index] = pkt.pts;
printf("pts_start_from: %s\n", av_ts2str(pts_start_from[pkt.stream_index]));
}
/* copy packet */
pkt.pts = av_rescale_q_rnd(pkt.pts - pts_start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
pkt.dts = av_rescale_q_rnd(pkt.dts - dts_start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
if (pkt.pts < 0) {
pkt.pts = 0;
}
if (pkt.dts < 0) {
pkt.dts = 0;
}
pkt.duration = (int)av_rescale_q((int64_t)pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
log_packet(ofmt_ctx, &pkt, "out");
printf("\n");
ret = av_interleaved_write_frame(ofmt_ctx, &pkt);
if (ret < 0) {
fprintf(stderr, "Error muxing packet\n");
break;
}
av_free_packet(&pkt);
}
free(dts_start_from);
free(pts_start_from);
av_write_trailer(ofmt_ctx);
end:
avformat_close_input(&ifmt_ctx);
/* close output */
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
if (ret < 0 && ret != AVERROR_EOF) {
fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
return 1;
}
return 0;
}
If you just wanna cut the video, you don't need to reencode the video if you want.
So I am supposing you wanna cut and reencode for some reason. So, based on your code:
Observe you must to have access to the video AVStream* structure... I named it as inVideoStream.
int convert_and_cut(char *file, float starttime, float endtime) {
AVFrame *frame;
AVPacket inPacket, outPacket;
if(avio_open(&outFormatContext->pb, file, AVIO_FLAG_WRITE) < 0) {
fprintf(stderr, "convert(): cannot open out file\n");
return 0;
}
// seek to the start time you wish.
// BEGIN
AVRational default_timebase;
default_timebase.num = 1;
default_timebase.den = AV_TIME_BASE;
// suppose you have access to the "inVideoStream" of course
int64_t starttime_int64 = av_rescale_q((int64_t)( starttime * AV_TIME_BASE ), default_timebase, inVideoStream->time_base);
int64_t endtime_int64 = av_rescale_q((int64_t)( endtime * AV_TIME_BASE ), default_timebase, inVideoStream->time_base);
if(avformat_seek_file(inFormatContext, inVideoStreamIndex, INT64_MIN, starttime_int64, INT64_MAX, 0) < 0) {
// error... do something...
return 0; // usually 0 is used for success in C, but I am following your code.
}
avcodec_flush_buffers( inVideoStream->codec );
// END
avformat_write_header(outFormatContext, NULL);
frame = avcodec_alloc_frame();
av_init_packet(&inPacket);
// you used avformat_seek_file() to seek CLOSE to the point you want... in order to give precision to your seek,
// just go on reading the packets and checking the packets PTS (presentation timestamp)
while(av_read_frame(inFormatContext, &inPacket) >= 0) {
if(inPacket.stream_index == inVideoStreamIndex) {
avcodec_decode_video2(inCodecContext, frame, &frameFinished, &inPacket);
// this line guarantees you are getting what you really want.
if(frameFinished && frame->pkt_pts >= starttime_int64 && frame->pkt_pts <= endtime_int64) {
av_init_packet(&outPacket);
avcodec_encode_video2(outCodecContext, &outPacket, frame, &outputed);
if(outputed) {
if (av_write_frame(outFormatContext, &outPacket) != 0) {
fprintf(stderr, "convert(): error while writing video frame\n");
return 0;
}
}
av_free_packet(&outPacket);
}
// exit the loop if you got the frames you want.
if(frame->pkt_pts > endtime_int64) {
break;
}
}
}
av_write_trailer(outFormatContext);
av_free_packet(&inPacket);
return 1;
}
In addition, as ustin says, the code based on ffmpeg remuxing.c example.
add out_stream->time_base = in_stream->time_base; before out_stream->codec->codec_tag = 0;
add int64_t *start_from = NULL; after int stream_mapping_size = 0;
add start_from = av_mallocz_array(stream_mapping_size, sizeof(*start_from)); after stream_mapping = av_mallocz_array(stream_mapping_size, sizeof(*stream_mapping));
add start_from[stream_mapping[i]] = INT16_MIN; after stream_mapping[i] = stream_index++;
add
if (start_from[pkt.stream_index] == INT16_MIN) {
int64_t dts = pkt.dts;
int64_t pts = pkt.pts;
int64_t min_ts = dts > pts ? pts : dts;
start_from[pkt.stream_index] = min_ts < 0 ? 0 : min_ts;
}
After
if (pkt.stream_index >= stream_mapping_size ||
stream_mapping[pkt.stream_index] < 0) {
av_packet_unref(&pkt);
continue;
}
change justin code, about pts/dst set value to this:
pkt.pts = av_rescale_q_rnd(pkt.pts - start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
pkt.dts = av_rescale_q_rnd(pkt.dts - start_from[pkt.stream_index], in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
add free(start_from); before end:
Tips: start/end timestamp like justin says

Resources