Related
I am trying to save all frames from a mp4 video in separate JPG files, I have a code that runs and actually saves something to JPG files but files are not recognized as images and nothing is showing.
Below my full code, I am using Visual Studio 2022 in Windows 11 and FFMPEG 5.1. The function that saves the images is save_frame_as_jpeg which is actually an adaption from the code provided here but changing the use of avcodec_encode_video2 for avcodec_send_frame/avcodec_receive_packet as indicated in the documentation.
I am obiously doing something wrong but cannot quite find it, BTW, I know that a simple command (ffmpeg -i input.mp4 -vf fps=1 vid_%d.png) will do this but I am requiring to do it by code.
Any help is appreciated, thanks in advance!
// FfmpegTests.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#pragma warning(disable : 4996)
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libavfilter/avfilter.h"
#include "libavutil/opt.h"
#include "libavutil/avutil.h"
#include "libavutil/error.h"
#include "libavfilter/buffersrc.h"
#include "libavfilter/buffersink.h"
#include "libswscale/swscale.h"
}
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "swscale.lib")
#include <cstdio>
#include <iostream>
#include <chrono>
#include <thread>
static AVFormatContext* fmt_ctx;
static AVCodecContext* dec_ctx;
AVFilterGraph* filter_graph;
AVFilterContext* buffersrc_ctx;
AVFilterContext* buffersink_ctx;
static int video_stream_index = -1;
const char* filter_descr = "scale=78:24,transpose=cclock";
static int64_t last_pts = AV_NOPTS_VALUE;
static int open_input_file(const char* filename)
{
const AVCodec* dec;
int ret;
if ((ret = avformat_open_input(&fmt_ctx, filename, NULL, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n");
return ret;
}
if ((ret = avformat_find_stream_info(fmt_ctx, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
return ret;
}
/* select the video stream */
ret = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &dec, 0);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find a video stream in the input file\n");
return ret;
}
video_stream_index = ret;
/* create decoding context */
dec_ctx = avcodec_alloc_context3(dec);
if (!dec_ctx)
return AVERROR(ENOMEM);
avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[video_stream_index]->codecpar);
/* init the video decoder */
if ((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open video decoder\n");
return ret;
}
return 0;
}
static int init_filters(const char* filters_descr)
{
char args[512];
int ret = 0;
const AVFilter* buffersrc = avfilter_get_by_name("buffer");
const AVFilter* buffersink = avfilter_get_by_name("buffersink");
AVFilterInOut* outputs = avfilter_inout_alloc();
AVFilterInOut* inputs = avfilter_inout_alloc();
AVRational time_base = fmt_ctx->streams[video_stream_index]->time_base;
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE };
filter_graph = avfilter_graph_alloc();
if (!outputs || !inputs || !filter_graph) {
ret = AVERROR(ENOMEM);
goto end;
}
/* buffer video source: the decoded frames from the decoder will be inserted here. */
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
dec_ctx->width, dec_ctx->height, dec_ctx->pix_fmt,
time_base.num, time_base.den,
dec_ctx->sample_aspect_ratio.num, dec_ctx->sample_aspect_ratio.den);
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in",
args, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
goto end;
}
/* buffer video sink: to terminate the filter chain. */
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",
NULL, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
goto end;
}
ret = av_opt_set_int_list(buffersink_ctx, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output pixel format\n");
goto end;
}
outputs->name = av_strdup("in");
outputs->filter_ctx = buffersrc_ctx;
outputs->pad_idx = 0;
outputs->next = NULL;
inputs->name = av_strdup("out");
inputs->filter_ctx = buffersink_ctx;
inputs->pad_idx = 0;
inputs->next = NULL;
if ((ret = avfilter_graph_parse_ptr(filter_graph, filters_descr,
&inputs, &outputs, NULL)) < 0)
goto end;
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
goto end;
end:
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
return ret;
}
static void display_frame(const AVFrame* frame, AVRational time_base)
{
int x, y;
uint8_t* p0, * p;
int64_t delay;
if (frame->pts != AV_NOPTS_VALUE) {
if (last_pts != AV_NOPTS_VALUE) {
/* sleep roughly the right amount of time;
* usleep is in microseconds, just like AV_TIME_BASE. */
AVRational timeBaseQ;
timeBaseQ.num = 1;
timeBaseQ.den = AV_TIME_BASE;
delay = av_rescale_q(frame->pts - last_pts, time_base, timeBaseQ);
if (delay > 0 && delay < 1000000)
std::this_thread::sleep_for(std::chrono::microseconds(delay));
}
last_pts = frame->pts;
}
/* Trivial ASCII grayscale display. */
p0 = frame->data[0];
puts("\033c");
for (y = 0; y < frame->height; y++) {
p = p0;
for (x = 0; x < frame->width; x++)
putchar(" .-+#"[*(p++) / 52]);
putchar('\n');
p0 += frame->linesize[0];
}
fflush(stdout);
}
int save_frame_as_jpeg(AVCodecContext* pCodecCtx, AVFrame* pFrame, int FrameNo) {
int ret = 0;
const AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_JPEG2000);
if (!jpegCodec) {
return -1;
}
AVCodecContext* jpegContext = avcodec_alloc_context3(jpegCodec);
if (!jpegContext) {
return -1;
}
jpegContext->pix_fmt = pCodecCtx->pix_fmt;
jpegContext->height = pFrame->height;
jpegContext->width = pFrame->width;
jpegContext->time_base = AVRational{ 1,10 };
ret = avcodec_open2(jpegContext, jpegCodec, NULL);
if (ret < 0) {
return ret;
}
FILE* JPEGFile;
char JPEGFName[256];
AVPacket packet;
packet.data = NULL;
packet.size = 0;
av_init_packet(&packet);
int gotFrame;
ret = avcodec_send_frame(jpegContext, pFrame);
if (ret < 0) {
return ret;
}
ret = avcodec_receive_packet(jpegContext, &packet);
if (ret < 0) {
return ret;
}
sprintf(JPEGFName, "c:\\folder\\dvr-%06d.jpg", FrameNo);
JPEGFile = fopen(JPEGFName, "wb");
fwrite(packet.data, 1, packet.size, JPEGFile);
fclose(JPEGFile);
av_packet_unref(&packet);
avcodec_close(jpegContext);
return 0;
}
int main(int argc, char** argv)
{
AVFrame* frame;
AVFrame* filt_frame;
AVPacket* packet;
int ret;
if (argc != 2) {
fprintf(stderr, "Usage: %s file\n", argv[0]);
exit(1);
}
frame = av_frame_alloc();
filt_frame = av_frame_alloc();
packet = av_packet_alloc();
if (!frame || !filt_frame || !packet) {
fprintf(stderr, "Could not allocate frame or packet\n");
exit(1);
}
if ((ret = open_input_file(argv[1])) < 0)
goto end;
if ((ret = init_filters(filter_descr)) < 0)
goto end;
while (true)
{
if ((ret = av_read_frame(fmt_ctx, packet)) < 0)
break;
if (packet->stream_index == video_stream_index) {
ret = avcodec_send_packet(dec_ctx, packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while sending a packet to the decoder\n");
break;
}
while (ret >= 0)
{
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
}
else if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while receiving a frame from the decoder\n");
goto end;
}
frame->pts = frame->best_effort_timestamp;
/* push the decoded frame into the filtergraph */
if (av_buffersrc_add_frame_flags(buffersrc_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
av_log(NULL, AV_LOG_ERROR, "Error while feeding the filtergraph\n");
break;
}
/* pull filtered frames from the filtergraph */
while (1) {
ret = av_buffersink_get_frame(buffersink_ctx, filt_frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
break;
if (ret < 0)
goto end;
display_frame(filt_frame, buffersink_ctx->inputs[0]->time_base);
av_frame_unref(filt_frame);
ret = save_frame_as_jpeg(dec_ctx, frame, dec_ctx->frame_number);
if (ret < 0)
goto end;
}
av_frame_unref(frame);
}
}
av_packet_unref(packet);
}
end:
avfilter_graph_free(&filter_graph);
avcodec_free_context(&dec_ctx);
avformat_close_input(&fmt_ctx);
av_frame_free(&frame);
av_frame_free(&filt_frame);
av_packet_free(&packet);
if (ret < 0 && ret != AVERROR_EOF) {
char errBuf[AV_ERROR_MAX_STRING_SIZE]{0};
int res = av_strerror(ret, errBuf, AV_ERROR_MAX_STRING_SIZE);
fprintf(stderr, "Error: %s\n", errBuf);
exit(1);
}
exit(0);
}
Well nevermind, I just realized I had an error with the specified codec for JPEG decoding, if somenone's facing this issue you have to use:
*const AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_MJPEG);*
instead of:
*const AVCodec* jpegCodec = avcodec_find_encoder(AV_CODEC_ID_JPEG2000);*
and also add this line:
*jpegContext->strict_std_compliance = FF_COMPLIANCE_UNOFFICIAL;*
before the call to avcodec_open2
I use libavformat to encapsulate an h264 stream and an aac stream into an mp4 file which is playable. However, when encapsulated into a ts file, it works fine in the Win10 player, but no audio in the vlc player. When encapsulating, the audio stream is printed, but with fprobe, the audio stream is printed with channel=0. What could be the reason for this?
And h264 source file is no pts.So I caculate it by myself.
ffprobe print
ffmpeg print
Here is my code.
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
static void log_packet(const AVFormatContext* fmt_ctx, const AVPacket* pkt, const char* tag)
{
AVRational* time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
printf("%s num=%d den=%d\n", tag, time_base->num, time_base->den);
printf("%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
tag,
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
}
int main()
{
const char* in_filename_v = "test.h264";
const char* in_filename_a = "aoutput.aac";
const char* out_filename = "lol.ts";
//Video Input AVFormatContext
AVFormatContext* ifmt_ctx_v = NULL;
int ret = avformat_open_input(&ifmt_ctx_v, in_filename_v, 0, 0);
if (ret < 0)
{
fprintf(stderr, "Could not open input_v %s", in_filename_v);
return -1;
}
//Find Video Stream Info
ret = avformat_find_stream_info(ifmt_ctx_v, 0);
if (ret < 0)
{
fprintf(stderr, "Could not find input_v stream info");
return -1;
}
//Audio Input AVFormatContext
AVFormatContext* ifmt_ctx_a = NULL;
ret = avformat_open_input(&ifmt_ctx_a, in_filename_a, 0, 0);
if (ret < 0)
{
fprintf(stderr, "Could not open input_a %s", in_filename_a);
return -1;
}
//Find Audio Stream Info
ret = avformat_find_stream_info(ifmt_ctx_a, 0);
if (ret < 0)
{
fprintf(stderr, "Could not find input_a stream info");
return -1;
}
printf("===========Input Information==========\n");
av_dump_format(ifmt_ctx_v, 0, in_filename_v, 0);
av_dump_format(ifmt_ctx_a, 0, in_filename_a, 0);
printf("======================================\n");
//Output AVFormatContext
AVFormatContext* ofmt_ctx = NULL;
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx)
{
fprintf(stderr, "cannot alloc OutputFromat context!");
ret = AVERROR_UNKNOWN;
return -1;
}
AVOutputFormat* ofmt = ofmt_ctx->oformat;
//Alloc AVSTREAM
int istream_index_v = 0, istream_index_a = 0, ostream_index_v = 0, ostream_index_a = 0;
for (int i = 0; i < ifmt_ctx_v->nb_streams; i++)
{
AVStream* outstream;
AVStream* in_stream = ifmt_ctx_v->streams[i];
AVCodecParameters* in_codecpar = in_stream->codecpar;
if (in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
continue;
outstream = avformat_new_stream(ofmt_ctx, NULL);
if (!outstream)
{
fprintf(stderr, "Failed allocating output stream\n");
return -1;
}
ret = avcodec_parameters_copy(outstream->codecpar, in_codecpar);
if (ret < 0)
{
fprintf(stderr, "Failed to copy codec parameters\n");
return -1;
}
outstream->codecpar->codec_tag = 0;
// Remeber video stream id
istream_index_v = i;
ostream_index_v = 0;
break;
}
for (int i = 0; i < ifmt_ctx_a->nb_streams; i++)
{
AVStream* outstream;
AVStream* in_stream = ifmt_ctx_a->streams[i];
AVCodecParameters* in_codecpar = in_stream->codecpar;
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
continue;
outstream = avformat_new_stream(ofmt_ctx, NULL);
if (!outstream)
{
fprintf(stderr, "Failed allocating output stream\n");
return -1;
}
ret = avcodec_parameters_copy(outstream->codecpar, in_codecpar);
if (ret < 0)
{
fprintf(stderr, "Failed to copy codec parameters\n");
return -1;
}
outstream->codecpar->codec_tag = 0;
// Remeber audio stream id
istream_index_a = i;
ostream_index_a = 1;
break;
}
printf("===========Output Information==========\n");
av_dump_format(ofmt_ctx, 0, out_filename, 1);
printf("======================================\n");
if (!(ofmt->flags & AVFMT_NOFILE))
{
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0)
{
fprintf(stderr, "Could not open output file '%s'", out_filename);
return -1;
}
}
//Write file header
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
return -1;
}
//read and write packet
AVPacket* pkt = av_packet_alloc();
if (!pkt)
{
fprintf(stderr, "Could not allocate AVPacket\n");
return -1;
}
while (1)
{
AVStream* in_stream, * outstream;
ret = av_read_frame(ifmt_ctx_v, pkt);
if (ret < 0)
break;
in_stream = ifmt_ctx_v->streams[pkt->stream_index];
if (pkt->stream_index != istream_index_v)
{
av_packet_unref(pkt);
continue;
}
pkt->stream_index = ostream_index_v;
outstream = ofmt_ctx->streams[pkt->stream_index];
// in log info
log_packet(ifmt_ctx_v, pkt, "in");
if (pkt->pts == AV_NOPTS_VALUE)
{
AVRational time_base1 = in_stream->time_base;
//
int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);
static int frame_index = 0;
pkt->pts = (double)(frame_index * calc_duration) / (double)(av_q2d(time_base1) * AV_TIME_BASE);
pkt->dts = pkt->pts;
pkt->duration = (double)calc_duration / (double)(av_q2d(time_base1) * AV_TIME_BASE);
frame_index++;
}
// duration between two frames(us)
av_packet_rescale_ts(pkt, in_stream->time_base, outstream->time_base);
pkt->pos = -1;
// out log info
log_packet(ofmt_ctx, pkt, "out");
ret = av_interleaved_write_frame(ofmt_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error muxing packet\n");
break;
}
}
while (1)
{
AVStream* in_stream, * outstream;
ret = av_read_frame(ifmt_ctx_a, pkt);
if (ret < 0)
break;
in_stream = ifmt_ctx_a->streams[pkt->stream_index];
if (pkt->stream_index != istream_index_a)
{
av_packet_unref(pkt);
continue;
}
if (pkt->pts == AV_NOPTS_VALUE)
{
AVRational time_base1 = in_stream->time_base;
// duration between two frames(us)
int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);
static int frame_index = 0;
pkt->pts = (double)(frame_index * calc_duration) / (double)(av_q2d(time_base1) * AV_TIME_BASE);
pkt->dts = pkt->pts;
pkt->duration = (double)calc_duration / (double)(av_q2d(time_base1) * AV_TIME_BASE);
frame_index++;
}
// in log info
log_packet(ifmt_ctx_a, pkt, "in");
pkt->stream_index = ostream_index_a;
outstream = ofmt_ctx->streams[pkt->stream_index];
//change timestamp
av_packet_rescale_ts(pkt, in_stream->time_base, outstream->time_base);
pkt->pos = -1;
// out log info
log_packet(ofmt_ctx, pkt, "out");
ret = av_interleaved_write_frame(ofmt_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error muxing packet\n");
break;
}
}
//write file trailer
av_write_trailer(ofmt_ctx);
printf("===========Output Information==========\n");
av_dump_format(ofmt_ctx, 0, out_filename, 1);
printf("======================================\n");
}
DVB Insepctor video
DVB Insepctor audio
Thanks for #aergistal.
The Reason is that av_interleaved_write_frame has buffer limit.I
hadn't thought about this before so I write all video packages
firstly and then write all audio packages.In ts files, at front are
lots of video packages, followed by lots of audio packages, and
finally both packages interleaved.
Because of MPEG-TS is a stream consists of packages , so in a long time the player can't find audio packages resulting no voice.
Here is my new code that can work.
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#define EXTRAL 1
static void log_packet(const AVFormatContext* fmt_ctx, const AVPacket* pkt, const char* tag)
{
AVRational* time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
FILE* fp = fopen("fflog.log", "a+");
char buf[200];
sprintf(buf, "%s num=%d den=%d\n", tag, time_base->num, time_base->den);
for (int i = 0; *(buf + i) != '\0'; i++)
{
fwrite(buf + i, 1, 1, fp);
}
sprintf(buf, "%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
tag,
av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
pkt->stream_index);
for (int i = 0; *(buf + i) != '\0'; i++)
{
fwrite(buf + i, 1, 1, fp);
}
fclose(fp);
}
int main()
{
const char* in_filename_v = "test.h264";
const char* in_filename_a = "aoutput.aac";
const char* out_filename = "lol.ts";
AVFormatContext* ifmt_ctx_v = NULL;
int ret = avformat_open_input(&ifmt_ctx_v, in_filename_v, 0, 0);
if (ret < 0)
{
fprintf(stderr, "Could not open input_v %s", in_filename_v);
return -1;
}
ret = avformat_find_stream_info(ifmt_ctx_v, 0);
if (ret < 0)
{
fprintf(stderr, "Could not find input_v stream info");
return -1;
}
AVFormatContext* ifmt_ctx_a = NULL;
ret = avformat_open_input(&ifmt_ctx_a, in_filename_a, 0, 0);
if (ret < 0)
{
fprintf(stderr, "Could not open input_a %s", in_filename_a);
return -1;
}
ret = avformat_find_stream_info(ifmt_ctx_a, 0);
if (ret < 0)
{
fprintf(stderr, "Could not find input_a stream info");
return -1;
}
#if EXTRAL
printf("===========Input Information==========\n");
av_dump_format(ifmt_ctx_v, 0, in_filename_v, 0);
av_dump_format(ifmt_ctx_a, 0, in_filename_a, 0);
printf("======================================\n");
#endif
AVFormatContext* ofmt_ctx = NULL;
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx)
{
fprintf(stderr, "cannot alloc OutputFromat context!");
ret = AVERROR_UNKNOWN;
return -1;
}
AVOutputFormat* ofmt = ofmt_ctx->oformat;
int istream_index_v = 0, istream_index_a = 0, ostream_index_v = 0, ostream_index_a = 0;
for (int i = 0; i < ifmt_ctx_v->nb_streams; i++)
{
AVStream* outstream;
AVStream* in_stream = ifmt_ctx_v->streams[i];
AVCodecParameters* in_codecpar = in_stream->codecpar;
if (in_codecpar->codec_type != AVMEDIA_TYPE_VIDEO)
continue;
outstream = avformat_new_stream(ofmt_ctx, NULL);
if (!outstream)
{
fprintf(stderr, "Failed allocating output stream\n");
return -1;
}
ret = avcodec_parameters_copy(outstream->codecpar, in_codecpar);
if (ret < 0)
{
fprintf(stderr, "Failed to copy codec parameters\n");
return -1;
}
outstream->codecpar->codec_tag = 0;
istream_index_v = i;
ostream_index_v = 0;
break;
}
for (int i = 0; i < ifmt_ctx_a->nb_streams; i++)
{
AVStream* outstream;
AVStream* in_stream = ifmt_ctx_a->streams[i];
AVCodecParameters* in_codecpar = in_stream->codecpar;
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
continue;
outstream = avformat_new_stream(ofmt_ctx, NULL);
if (!outstream)
{
fprintf(stderr, "Failed allocating output stream\n");
return -1;
}
ret = avcodec_parameters_copy(outstream->codecpar, in_codecpar);
if (ret < 0)
{
fprintf(stderr, "Failed to copy codec parameters\n");
return -1;
}
outstream->codecpar->codec_tag = 0;
istream_index_a = i;
ostream_index_a = 1;
break;
}
#if EXTRAL
printf("===========Output Information==========\n");
av_dump_format(ofmt_ctx, 0, out_filename, 1);
printf("======================================\n");
#endif
if (!(ofmt->flags & AVFMT_NOFILE))
{
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0)
{
fprintf(stderr, "Could not open output file '%s'", out_filename);
return -1;
}
}
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
return -1;
}
AVPacket* pkt = av_packet_alloc();
if (!pkt)
{
fprintf(stderr, "Could not allocate AVPacket\n");
return -1;
}
int64_t pts_v = 0, pts_a = 0;
while (1)
{
if (av_compare_ts(pts_a, ifmt_ctx_a->streams[istream_index_a]->time_base, pts_v, ifmt_ctx_v->streams[istream_index_v]->time_base) <= 0)
{
AVStream* in_stream, * outstream;
ret = av_read_frame(ifmt_ctx_a, pkt);
if (ret < 0)
break;
in_stream = ifmt_ctx_a->streams[pkt->stream_index];
if (pkt->stream_index != istream_index_a)
{
av_packet_unref(pkt);
continue;
}
if (pkt->pts == AV_NOPTS_VALUE)
{
AVRational time_base1 = in_stream->time_base;
int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);
static int frame_index = 0;
pkt->pts = (double)(frame_index * calc_duration) / (double)(av_q2d(time_base1) * AV_TIME_BASE);
pkt->dts = pkt->pts;
pkt->duration = (double)calc_duration / (double)(av_q2d(time_base1) * AV_TIME_BASE);
frame_index++;
}
pts_a = pkt->pts;
// in log info
log_packet(ifmt_ctx_a, pkt, "in audio");
pkt->stream_index = ostream_index_a;
outstream = ofmt_ctx->streams[pkt->stream_index];
av_packet_rescale_ts(pkt, in_stream->time_base, outstream->time_base);
pkt->pos = -1;
// out log info
log_packet(ofmt_ctx, pkt, "out audio");
ret = av_interleaved_write_frame(ofmt_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error muxing packet\n");
return -1;
}
}
else
{
AVStream* in_stream, * outstream;
ret = av_read_frame(ifmt_ctx_v, pkt);
if (ret < 0)
break;
in_stream = ifmt_ctx_v->streams[pkt->stream_index];
if (pkt->stream_index != istream_index_v)
{
av_packet_unref(pkt);
continue;
}
pkt->stream_index = ostream_index_v;
outstream = ofmt_ctx->streams[pkt->stream_index];
if (pkt->pts == AV_NOPTS_VALUE)
{
AVRational time_base1 = in_stream->time_base;
int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(in_stream->r_frame_rate);
static int frame_index = 0;
pkt->pts = (double)(frame_index * calc_duration) / (double)(av_q2d(time_base1) * AV_TIME_BASE);
pkt->dts = pkt->pts;
pkt->duration = (double)calc_duration / (double)(av_q2d(time_base1) * AV_TIME_BASE);
frame_index++;
}
pts_v = pkt->pts;
// in log info
log_packet(ifmt_ctx_v, pkt, "in video");
av_packet_rescale_ts(pkt, in_stream->time_base, outstream->time_base);
pkt->pos = -1;
// out log info
log_packet(ofmt_ctx, pkt, "out video");
ret = av_interleaved_write_frame(ofmt_ctx, pkt);
if (ret < 0)
{
fprintf(stderr, "Error muxing packet\n");
return -1;
}
}
}
ret = av_write_trailer(ofmt_ctx);
if (ret < 0)
{
fprintf(stderr, "Error av_write_trailer\n");
}
#if EXTRAL
printf("===========Output Information==========\n");
av_dump_format(ofmt_ctx, 0, out_filename, 1);
printf("======================================\n");
#endif
av_packet_free(&pkt);
avformat_close_input(&ifmt_ctx_v);
avformat_close_input(&ifmt_ctx_a);
if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))
avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
return 0;
}
I have the following code to transcode video which is closely related to the FFMPEG transcoding example.
However it produced broken video as shown:
It seems like the i-frames are decoded correctly, but the p and b frames are out of order? I thought av_interleaved_write_frame() would rectify that for me. It also seems like the libx264 is not creating any extradata which I thought it would.
Could someone please help me work out why?
Many thanks
#include <stdbool.h>
#include <stdio.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/mathematics.h>
#include <libavutil/opt.h>
#define DEBUG(level, format, ...) fprintf(stderr, format "\n", ##__VA_ARGS__)
#define stringify2(var) #var
#define stringify(var) stringify2(var)
#define RASSERT(cond, ...) do { \
if (!(cond)) { \
DEBUG(LOG_FATAL, __FILE__ ":" stringify(__LINE__) " " "Assertion failed! " #cond ". " __VA_ARGS__); \
abort(); \
} \
} while (0)
#define FFCHECK(ret, func) RASSERT(ret == 0, #func " failed: %s (%d)", av_err2str(ret), ret)
typedef struct decode_context {
AVFormatContext *format;
AVCodecContext *videoCodec;
AVCodecContext *audioCodec;
AVStream *videoStream;
AVStream *audioStream;
} decode_context_t;
typedef struct encode_context {
AVFormatContext *format;
AVCodecContext *videoCodec;
AVCodecContext *audioCodec;
AVStream *videoStream;
AVStream *audioStream;
} encode_context_t;
void open_input(decode_context_t *dec, const char *file) {
int ret;
ret = avformat_open_input(&dec->format, file, NULL, NULL);
FFCHECK(ret, "avformat_open_input()");
ret = avformat_find_stream_info(dec->format, NULL);
FFCHECK(ret, "avformat_find_stream_info()");
for (unsigned int i = 0; i < dec->format->nb_streams; ++i) {
AVStream * stream = dec->format->streams[i];
enum AVMediaType type = stream->codecpar->codec_type;
switch (type) {
case AVMEDIA_TYPE_VIDEO:
if (dec->videoStream)
break;
dec->videoStream = stream;
break;
case AVMEDIA_TYPE_AUDIO:
dec->audioStream = stream;
if (dec->audioStream)
break;
break;
default:
break;
}
}
RASSERT(dec->videoStream != NULL, "Didn't find video stream");
const AVCodec * codec = avcodec_find_decoder(dec->videoStream->codecpar->codec_id);
RASSERT(codec, "Failed to find decoder");
dec->videoCodec = avcodec_alloc_context3(codec);
RASSERT(dec->videoCodec, "avcodec_alloc_context3() failed");
ret = avcodec_parameters_to_context(dec->videoCodec, dec->videoStream->codecpar);
FFCHECK(ret, "avcodec_parameters_to_context()");
dec->videoCodec->framerate = av_guess_frame_rate(dec->format, dec->videoStream, NULL);
ret = avcodec_open2(dec->videoCodec, codec, NULL);
FFCHECK(ret, "avcodec_open2()");
}
void open_output(encode_context_t *enc, const char *file, decode_context_t *dec) {
int ret;
ret = avformat_alloc_output_context2(&enc->format, NULL, NULL, file);
FFCHECK(ret, "avformat_alloc_output_context2()");
enc->videoStream = avformat_new_stream(enc->format, NULL);
RASSERT(enc->videoStream, "avformat_new_stream() failed");
enc->videoStream->id = enc->format->nb_streams - 1;
const AVCodec *codec = avcodec_find_encoder_by_name("libx264");
RASSERT(codec, "Failed to find encoder");
enc->videoCodec = avcodec_alloc_context3(codec);
RASSERT(enc->videoCodec, "avcodec_alloc_context3() failed");
enc->videoCodec->bit_rate = 400000;
enc->videoCodec->width = dec->videoCodec->width;
enc->videoCodec->height = dec->videoCodec->height;
enc->videoCodec->sample_aspect_ratio = dec->videoCodec->sample_aspect_ratio;
enc->videoCodec->time_base = av_inv_q(dec->videoCodec->framerate);
//enc->videoCodec->gop_size = 12;
//enc->videoCodec->max_b_frames = 2;
enc->videoCodec->pix_fmt = dec->videoCodec->pix_fmt;
enc->videoCodec->framerate = dec->videoCodec->framerate;
if (codec->id == AV_CODEC_ID_H264) {
av_opt_set(enc->videoCodec->priv_data, "preset", "slow", 0);
av_opt_set(enc->videoCodec->priv_data, "profile", "high", 0);
av_opt_set(enc->videoCodec->priv_data, "level", "4.1", 0);
}
if (enc->format->flags & AVFMT_GLOBALHEADER)
enc->videoCodec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
ret = avcodec_open2(enc->videoCodec, codec, NULL);
FFCHECK(ret, "avcodec_open2()");
ret = avcodec_parameters_from_context(enc->videoStream->codecpar, enc->videoCodec);
FFCHECK(ret, "avcodec_parameters_from_context()");
//enc->videoStream->time_base = enc->videoCodec->time_base;
//enc->videoStream->codecpar->extradata = enc->videoCodec->extradata;
//enc->videoStream->codecpar->extradata_size = enc->videoCodec->extradata_size;
av_dump_format(enc->format, 0, file, 1);
ret = avio_open(&enc->format->pb, file, AVIO_FLAG_WRITE);
FFCHECK(ret, "avio_open()");
ret = avformat_write_header(enc->format, NULL);
FFCHECK(ret, "avformat_write_header()");
}
int main(int argc, const char * argv[]) {
int ret;
if (argc < 3) {
fprintf(stderr, "%s input output\n", argv[0]);
return 1;
}
decode_context_t dec_ctx = {};
decode_context_t *dec = &dec_ctx;
encode_context_t enc_ctx = {};
encode_context_t *enc = &enc_ctx;
open_input(dec, argv[1]);
open_output(enc, argv[2], dec);
while (true) {
AVPacket *packet = av_packet_alloc();
ret = av_read_frame(dec->format, packet);
if (ret < 0)
break;
if (packet->stream_index == dec->videoStream->index) {
ret = avcodec_send_packet(dec->videoCodec, packet);
if (ret == AVERROR(EAGAIN)) {
AVFrame * frame = av_frame_alloc();
while (true) {
ret = avcodec_receive_frame(dec->videoCodec, frame);
if (ret == AVERROR(EAGAIN))
break;
FFCHECK(ret, "avcodec_receive_frame()");
ret = avcodec_send_frame(enc->videoCodec, frame);
if (ret == AVERROR(EAGAIN)) {
AVPacket *pkt = av_packet_alloc();
while (true) {
ret = avcodec_receive_packet(enc->videoCodec, pkt);
if (ret == AVERROR(EAGAIN))
break;
FFCHECK(ret, "avcodec_receive_packet()");
pkt->stream_index = enc->videoStream->id;
av_packet_rescale_ts(pkt, dec->videoStream->time_base, enc->videoStream->time_base);
ret = av_interleaved_write_frame(enc->format, pkt);
FFCHECK(ret, "av_interleaved_write_frame()");
}
av_packet_free(&pkt);
}
}
av_frame_free(&frame);
} else {
FFCHECK(ret, "avcodec_send_packet()");
}
} else if (packet->stream_index == dec->audioStream->index) {
// Deal with audio
}
av_packet_free(&packet);
}
ret = av_write_trailer(enc->format);
FFCHECK(ret, "av_write_trailer()");
ret = avio_close(enc->format->pb);
FFCHECK(ret, "avio_close()");
// Close some more stuff
return 0;
}
After thinking about this some more, I realised that I wasn't even sure if my decoder was correct. And another thorough inspection of the transcoding example revealed a small difference in behaviour.
Effectively, I was doing this:
while (true) {
ret = avcodec_send_packet(dec->videoCodec, packet);
if (ret != AVERROR(EAGAIN)) {
continue;
}
ret = avcodec_receive_frame(dec->videoCodec, frame);
}
Removing the continue and immediately trying to receive frames from the decoder works much better, and fixed my issue.
while (true) {
ret = avcodec_send_packet(dec->videoCodec, packet);
if (ret != 0)
abort();
ret = avcodec_receive_frame(dec->videoCodec, frame);
}
I also had the same error in the encoding side so I've fixed it there too.
In the following code, I can't figure out what's wrong:
uint8_t *dstData[4];
int dstLinesize[4];
AVPixelFormat convertToPixFmt = AV_PIX_FMT_RGBA;
int ret;
// ...
printf("tmp_frame format: %d (%s) %dx%d\n", tmp_frame->format, av_get_pix_fmt_name((AVPixelFormat)tmp_frame->format), tmp_frame->width, tmp_frame->height);
// The above line prints: tmp_frame format: 23 (nv12) 480x480
int size = av_image_get_buffer_size(convertToPixFmt, tmp_frame->width, tmp_frame->height, 1);
uint8_t *buffer = (uint8_t *) av_malloc(size);
ret = av_image_copy_to_buffer(buffer, size,
(const uint8_t * const *)&tmp_frame->data[i],
(const int *)&tmp_frame->linesize[i], (AVPixelFormat)tmp_frame->format,
tmp_frame->width, tmp_frame->height, 1);
ASSERT(ret >= 0);
ret = av_image_fill_arrays(dstData, dstLinesize, buffer, convertToPixFmt, dest_width, dest_height, 1);
ASSERT(ret >= 0);
ret = sws_scale(
convertContext,
dstData,
dstLinesize,
0,
dest_width,
convertedFrame->data,
convertedFrame->linesize);
printf("sws_scale returns %d\n", ret); // prints: sws_scale returns 0
ASSERT(ret == tmp_frame->height);
// ...
It's part of a code which uses dxva2 to obtain tmp_frame. I inspired the code from hw_decode.c and am sure that there's no mistake in the code. The tmp_frame is properly made in NV12 format. The error occurs just when I call sws_scale and it's:
bad src image pointers
So I don't know how to provide pointers not to get this error and sws_scale may work properly.
Any idea?
I update the question to include my whole code:
static AVBufferRef *hw_device_ctx = NULL;
static enum AVPixelFormat hw_pix_fmt;
static FILE *output_file = NULL;
int main(int argc, char *argv[])
{
AVFormatContext *input_ctx = NULL;
int video_stream, ret;
AVStream *video = NULL;
AVCodecContext *decoder_ctx = NULL;
AVCodec *decoder = NULL;
AVPacket packet;
enum AVHWDeviceType type;
int i;
if (argc < 2)
{
fprintf(stderr, "Usage: %s <input file>\n", argv[0]);
return -1;
}
type = av_hwdevice_find_type_by_name("dxva2");
ASSERT(type != AV_HWDEVICE_TYPE_NONE);
ASSERT(avformat_open_input(&input_ctx, argv[1], NULL, NULL) == 0);
ASSERT(avformat_find_stream_info(input_ctx, NULL) >= 0);
video_stream = av_find_best_stream(input_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, &decoder, 0);
ASSERT(video_stream >= 0);
decoder_ctx = avcodec_alloc_context3(decoder);
ASSERT(decoder_ctx);
video = input_ctx->streams[video_stream];
ASSERT(avcodec_parameters_to_context(decoder_ctx, video->codecpar) >= 0);
ASSERT(av_hwdevice_ctx_create(&hw_device_ctx, type, NULL, NULL, 0) >= 0);
decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
ASSERT(avcodec_open2(decoder_ctx, decoder, NULL) >= 0);
printf("video info: %dx%d\n", decoder_ctx->width, decoder_ctx->height);
AVFrame *frame = av_frame_alloc();
ASSERT(frame);
AVFrame *sw_frame = av_frame_alloc();
ASSERT(sw_frame);
AVFrame* convertedFrame = av_frame_alloc();
ASSERT(convertedFrame);
AVPixelFormat convertToPixFmt = AV_PIX_FMT_RGBA;
//int dest_width = 320, dest_height = 200;
int dest_width = decoder_ctx->width, dest_height = decoder_ctx->height;
SwsContext* convertContext = sws_getContext(decoder_ctx->width, decoder_ctx->height, AV_PIX_FMT_YUV420P,
dest_width, dest_height, convertToPixFmt,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
ASSERT(convertContext);
int convertedFrameAspectBufferSize = avpicture_get_size(convertToPixFmt, dest_width, dest_height);
void *convertedFrameBuffer = av_malloc(convertedFrameAspectBufferSize);
avpicture_fill((AVPicture*)convertedFrame, (uint8_t *)convertedFrameBuffer, convertToPixFmt, dest_width, dest_height);
output_file = fopen("1.out", "w+");
for (int i = 0; /*i < 20*/; i++)
{
ret = av_read_frame(input_ctx, &packet);
if (ret == AVERROR_EOF)
break;
ASSERT(ret >= 0);
if (video_stream != packet.stream_index)
continue;
int ret = avcodec_send_packet(decoder_ctx, &packet);
ASSERT(ret >= 0);
//printf("%p", decoder->hw_configs->hwaccel);
ret = avcodec_receive_frame(decoder_ctx, frame);
if (ret < 0)
printf("%d\t%d\n", i, ret);
AVFrame *tmp_frame;
if (frame->format > 0) // hw enabled
{
ASSERT(av_hwframe_transfer_data(sw_frame, frame, 0) >= 0);
tmp_frame = sw_frame;
}
else
{
tmp_frame = frame;
}
printf("frame format: %d (%s) %dx%d\n", frame->format, av_get_pix_fmt_name((AVPixelFormat)frame->format), frame->width, frame->height);
printf("sw_frame format: %d (%s) %dx%d\n", sw_frame->format, av_get_pix_fmt_name((AVPixelFormat)sw_frame->format), sw_frame->width, sw_frame->height);
printf("tmp_frame format: %d (%s) %dx%d\n", tmp_frame->format, av_get_pix_fmt_name((AVPixelFormat)tmp_frame->format), tmp_frame->width, tmp_frame->height);
/*
video info: 480x480
frame format: 53 (dxva2_vld) 480x480
sw_frame format: 23 (nv12) 480x480
[swscaler # 004cb2c0] bad src image pointers
*/
int size = av_image_get_buffer_size(convertToPixFmt, tmp_frame->width, tmp_frame->height, 1);
uint8_t *buffer = (uint8_t *) av_malloc(size);
ret = av_image_copy_to_buffer(buffer, size,
(const uint8_t * const *)&tmp_frame->data[i],
(const int *)&tmp_frame->linesize[i], (AVPixelFormat)tmp_frame->format,
tmp_frame->width, tmp_frame->height, 1);
ASSERT(ret > 0);
ret = av_image_fill_arrays(dstData, dstLinesize, buffer, convertToPixFmt, dest_width, dest_height, 1);
ASSERT(ret > 0);
ret = sws_scale(
convertContext,
tmp_frame->data,
tmp_frame->linesize,
0,
dest_width,
convertedFrame->data,
convertedFrame->linesize);
printf("sws_scale returns %d\n", ret);
ASSERT(ret == tmp_frame->height);
ret = fwrite(convertedFrame->data, tmp_frame->height * tmp_frame->width, 1, output_file);
ASSERT(ret == 1);
break;
}
av_frame_free(&frame);
av_packet_unref(&packet);
avcodec_free_context(&decoder_ctx);
avformat_close_input(&input_ctx);
av_buffer_unref(&hw_device_ctx);
return 0;
}
I would like to ask a question about ffmpeg when i use encoder (x264).
this is my code :
int
FFVideoEncoder::init(AVCodecID codecId, int bitrate, int fps, int gopSize,
int width, int height, AVPixelFormat format) {
release();
const AVCodec *codec = avcodec_find_encoder(codecId);
m_pCodecCtx = avcodec_alloc_context3(codec);
m_pCodecCtx->width = width;
m_pCodecCtx->height = height;
m_pCodecCtx->pix_fmt = format;
m_pCodecCtx->bit_rate = bitrate;
m_pCodecCtx->thread_count = 5;
m_pCodecCtx->max_b_frames = 0;
m_pCodecCtx->gop_size = gopSize;
m_pCodecCtx->time_base.num = 1;
m_pCodecCtx->time_base.den = fps;
//H.264
if (m_pCodecCtx->codec_id == AV_CODEC_ID_H264) {
// av_dict_set(&opts, "preset", "slow", 0);
av_dict_set(&m_pEncoderOpts, "preset", "superfast", 0);
av_dict_set(&m_pEncoderOpts, "tune", "zerolatency", 0);
m_pCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
m_pCodecCtx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
int ret = avcodec_open2(m_pCodecCtx, m_pCodecCtx->codec, &m_pEncoderOpts);
if (ret == 0) {
LOGI("open avcodec success!");
} else {
LOGE("open avcodec error!");
return -1;
}
return ret;
}
int FFVideoEncoder::encode(const Frame &inFrame, AVPacket *outPacket) {
AVFrame *frame = av_frame_alloc();
// avpicture_fill((AVPicture *) frame, inFrame.getData(), AV_PIX_FMT_YUV420P, inFrame.getWidth(),
// inFrame.getHeight());
av_image_fill_arrays(frame->data, frame->linesize, inFrame.getData(), m_pCodecCtx->pix_fmt,
inFrame.getWidth(), inFrame.getHeight(), 1);
int ret = 0;
ret = avcodec_send_frame(m_pCodecCtx, frame);
if (ret != 0) {
LOGE("send frame error! %s", av_err2str(ret));
} else {
ret = avcodec_receive_packet(m_pCodecCtx, outPacket);
LOGI("extract data size = %d", m_pCodecCtx->extradata_size);
if (ret != 0) {
LOGE("receive packet error! %s", av_err2str(ret));
}
};
av_frame_free(&frame);
return ret;
}
I expect that the AVPacket will carry the pts and dts about this frame.
but in fact, i only can get encoded frame data and size.
//====================================
except this question, i have another quesiont:
x264 docs say that "tune" opts can be set like film、animation and others. but i only can get a normal video when i set "zerolatency" params. When i set others opts, video's bitrate is very low.
Thanks your answer.
This is for simple example to see if it works:
I believe you should set frame->pts beforehand.
Try this:
Set frame->pts = framecount before sending to ret = avcodec_send_frame(m_pCodecCtx, frame)
Add this framecount as a simple counter of frames you send for encode. Increases each time.
Hope that helps.