FFMPEG C api h.264 encoding / MPEG2 ts streaming problems - c++11

Class prototype is as follows:
#ifndef _FULL_MOTION_VIDEO_STREAM_H_
#define _FULL_MOTION_VIDEO_STREAM_H_
#include <memory>
#include <string>
#ifndef INT64_C
# define INT64_C(c) (c ## LL)
# define UINT64_C(c) (c ## ULL)
#endif
extern "C"
{
#include "libavutil/opt.h"
#include "libavcodec/avcodec.h"
#include "libavutil/channel_layout.h"
#include "libavutil/common.h"
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
#include "libavformat/avformat.h"
#include <libavutil/timestamp.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>
}
class FMVStream
{
public:
struct OutputStream
{
OutputStream() :
st(0),
next_pts(0),
samples_count(0),
frame(0),
tmpFrame(0),
sws_ctx(0)
{
}
AVStream *st;
/* pts of the next frame that will be generated */
int64_t next_pts;
int samples_count;
AVFrame *frame;
AVFrame *tmpFrame;
struct SwsContext *sws_ctx;
};
///
/// Constructor
///
FMVStream();
///
/// Destructor
///
~FMVStream();
///
/// Frame encoder helper function
///
/// Encodes a raw RGB frame into the transport stream
///
int EncodeFrame(uint8_t* frame);
///
/// Frame width setter
///
void setFrameWidth(int width);
///
/// Frame width getter
///
int getFrameWidth() const;
///
/// Frame height setter
///
void setFrameHeight(int height);
///
/// Frame height getter
///
int getFrameHeight() const;
///
/// Stream address setter
///
void setStreamAddress(const std::string& address);
///
/// Stream address getter
///
std::string getStreamAddress() const;
private:
///
/// Video Stream creation
///
AVStream* initVideoStream(AVFormatContext* oc);
///
/// Raw frame transcoder
///
/// This will convert the raw RGB frame to a raw YUV frame necessary for h.264 encoding
///
void CopyFrameData(uint8_t* src_frame);
///
/// Video frame allocator
///
AVFrame* AllocPicture(PixelFormat pix_fmt, int width, int height);
///
/// Debug print helper function
///
void print_sdp(AVFormatContext **avc, int n);
///
/// Write the frame to the stream
///
int write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt);
///
/// initialize the frame data
///
void initFrame();
// formatting data needed for output streaming and the output container (MPEG 2 TS)
AVOutputFormat* format;
AVFormatContext* format_ctx;
// structure container for our video stream
OutputStream stream;
AVIOContext* io_ctx;
std::string streamFilename;
int frameWidth;
int frameHeight;
};
#endif
This block starts the class declaration.
#include "FullMotionVideoStream.h"
#include <stdexcept>
#include <iostream>
FMVStream::FMVStream()
: format(0),
format_ctx(0),
stream(),
io_ctx(0),
streamFilename("test.mpeg"),
frameWidth(640),
frameHeight(480)
{
// Register all formats and codecs
av_register_all();
avcodec_register_all();
// Init networking
avformat_network_init();
// Find format
this->format = av_guess_format("mpegts", NULL, NULL);
// allocate the AVFormatContext
this->format_ctx = avformat_alloc_context();
if (!this->format_ctx)
{
throw std::runtime_error("avformat_alloc_context failed");
}
this->format_ctx->oformat = this->format;
//sprintf_s(this->format_ctx->filename, sizeof(this->format_ctx->filename), "%s", this->streamFilename.c_str());
this->stream.st = initVideoStream(this->format_ctx);
this->initFrame();
// Allocate AVIOContext
int ret = avio_open(&this->io_ctx, this->streamFilename.c_str(), AVIO_FLAG_WRITE);
if (ret != 0)
{
throw std::runtime_error("avio_open failed");
}
this->format_ctx->pb = this->io_ctx;
// Print some debug info about the format
av_dump_format(this->format_ctx, 0, NULL, 1);
// Begin the output by writing the container header
avformat_write_header(this->format_ctx, NULL);
AVFormatContext* ac[] = { this->format_ctx };
print_sdp(ac, 1);
}
FMVStream::~FMVStream()
{
av_write_trailer(this->format_ctx);
avcodec_close(this->stream.st->codec);
avio_close(io_ctx);
avformat_free_context(this->format_ctx);
av_frame_free(&this->stream.frame);
av_free(this->format);
}
AVFrame* FMVStream::AllocPicture(PixelFormat pix_fmt, int width, int height)
{
// Allocate a frame
AVFrame* frame = av_frame_alloc();
if (frame == nullptr)
{
throw std::runtime_error("avcodec_alloc_frame failed");
}
if (av_image_alloc(frame->data, frame->linesize, width, height, pix_fmt, 1) < 0)
{
throw std::runtime_error("av_image_alloc failed");
}
frame->width = width;
frame->height = height;
frame->format = pix_fmt;
return frame;
}
void FMVStream::print_sdp(AVFormatContext **avc, int n)
{
char sdp[2048];
av_sdp_create(avc, n, sdp, sizeof(sdp));
printf("SDP:\n%s\n", sdp);
fflush(stdout);
}
AVStream* FMVStream::initVideoStream(AVFormatContext *oc)
{
AVStream* st = avformat_new_stream(oc, NULL);
if (st == nullptr)
{
std::runtime_error("Could not alloc stream");
}
AVCodec* codec = avcodec_find_encoder(AV_CODEC_ID_H264);
if (codec == nullptr)
{
throw std::runtime_error("couldn't find mpeg2 encoder");
}
st->codec = avcodec_alloc_context3(codec);
st->codec->codec_id = AV_CODEC_ID_H264;
st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
st->codec->bit_rate = 400000;
st->codec->width = this->frameWidth;
st->codec->height = this->frameHeight;
st->time_base.num = 1;
st->time_base.den = 30;
st->codec->framerate.num = 1;
st->codec->framerate.den = 30;
st->codec->max_b_frames = 2;
st->codec->gop_size = 12;
st->codec->pix_fmt = PIX_FMT_YUV420P;
st->id = oc->nb_streams - 1;
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
{
st->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
// option setup for the codec
av_opt_set(st->codec->priv_data, "profile", "baseline", AV_OPT_SEARCH_CHILDREN);
if (avcodec_open2(st->codec, codec, NULL) < 0)
{
throw std::runtime_error("avcodec_open failed");
}
return st;
}
void FMVStream::initFrame()
{
// Allocate a tmp frame for converting our raw RGB data to YUV for encoding
this->stream.tmpFrame = this->AllocPicture(PIX_FMT_RGB24, this->frameWidth, this->frameHeight);
// Allocate a main frame
this->stream.frame = this->AllocPicture(PIX_FMT_YUV420P, this->frameWidth, this->frameHeight);
}
This block is attempting to convert from the raw RGB to our needed YUV format for h.264 encoding.
void FMVStream::CopyFrameData(uint8_t* data)
{
// fill image with our raw RGB data
//avpicture_alloc((AVPicture*)this->stream.tmpFrame, PIX_FMT_RGB24, this->stream.st->codec->width, this->stream.st->codec->height);
int numBytes = avpicture_get_size(PIX_FMT_RGB24, this->stream.st->codec->width, this->stream.st->codec->height);
uint8_t* buffer = (uint8_t*) av_malloc(numBytes * sizeof(uint8_t));
avpicture_fill((AVPicture*)this->stream.tmpFrame, buffer, PIX_FMT_RGB24, this->stream.st->codec->width, this->stream.st->codec->height);
for (int y = 0; y < this->stream.st->codec->height; y++)
{
for (int x = 0; x < this->stream.st->codec->width; x++)
{
int offset = 3 * (x + y * this->stream.st->codec->width);
this->stream.tmpFrame->data[0][offset + 0] = data[x + y * this->stream.st->codec->width]; // R
this->stream.tmpFrame->data[0][offset + 1] = data[x + y * this->stream.st->codec->width + 1]; // G
this->stream.tmpFrame->data[0][offset + 2] = data[x + y * this->stream.st->codec->width + 2]; // B
}
}
// convert the RGB frame to a YUV frame using the sws Context
this->stream.sws_ctx = sws_getContext(this->stream.st->codec->width, this->stream.st->codec->height, PIX_FMT_RGB32, this->stream.st->codec->width, this->stream.st->codec->height, PIX_FMT_YUV420P, SWS_FAST_BILINEAR, NULL, NULL, NULL);
// use the scale function to transcode this raw frame to the correct type
sws_scale(this->stream.sws_ctx, this->stream.tmpFrame->data, this->stream.tmpFrame->linesize, 0, this->stream.st->codec->height, this->stream.frame->data, this->stream.frame->linesize);
}
This is the block that encodes the raw data to h.264, and then send it out the Mpeg2 ts. I believe the problem lies within this block. I can put a break point in my write frame block and see that frames are being written, however, opening the resulting file in VLC results in a blank video. The file is approx 2Mb.
int FMVStream::EncodeFrame(uint8_t* data)
{
AVCodecContext* c = this->stream.st->codec;
AVRational one;
one.den = one.num = 1;
// check to see if we want to keep writing frames we can probably change this to a toggle switch
if (av_compare_ts(this->stream.next_pts, this->stream.st->codec->time_base, 10, one) >= 0)
{
this->stream.frame = nullptr;
}
else
{
// Convert and load the frame data into the AVFrame struct
CopyFrameData(data);
}
// setup the timestamp stepping
AVPacket pkt = { 0 };
av_init_packet(&pkt);
this->stream.frame->pts = (int64_t)((1.0 / this->stream.st->codec->framerate.den) * 90000.0 * this->stream.next_pts++);
int gotPacket, out_size, ret;
out_size = avcodec_encode_video2(c, &pkt, this->stream.frame, &gotPacket);
if (gotPacket == 1)
{
ret = write_frame(this->format_ctx, &c->time_base, this->stream.st, &pkt);
}
else
{
ret = 0;
}
if (ret < 0)
{
std::cerr << "Error writing video frame" << std::endl;
}
av_free_packet(&pkt);
return ((this->stream.frame != nullptr) || gotPacket) ? 0 : 1;
}
int FMVStream::write_frame(AVFormatContext *fmt_ctx, const AVRational *time_base, AVStream *st, AVPacket *pkt)
{
/* rescale output packet timestamp values from codec to stream timebase */
av_packet_rescale_ts(pkt, *time_base, st->time_base);
pkt->stream_index = st->index;
return av_interleaved_write_frame(fmt_ctx, pkt);
}
void FMVStream::setFrameWidth(const int width)
{
this->frameWidth = width;
}
int FMVStream::getFrameWidth() const
{
return this->frameWidth;
}
void FMVStream::setFrameHeight(const int height)
{
this->frameHeight = height;
}
int FMVStream::getFrameHeight() const
{
return this->frameHeight;
}
void FMVStream::setStreamAddress(const std::string& address)
{
this->streamFilename = address;
}
std::string FMVStream::getStreamAddress() const
{
return this->streamFilename;
}
Here is the Main function.
#include "FullMotionVideoStream.h"
#include <iostream>
#include <thread>
#include <chrono>
int main(int argc, char** argv)
{
FMVStream* fmv = new FMVStream;
fmv->setFrameWidth(640);
fmv->setFrameHeight(480);
std::cout << "Streaming Address: " << fmv->getStreamAddress() << std::endl;
// create our alternating frame of black and white to test the streaming functionality
uint8_t white[640 * 480 * sizeof(uint8_t) * 3];
uint8_t black[640 * 480 * sizeof(uint8_t) * 3];
std::memset(white, 255, 640 * 480 * sizeof(uint8_t) * 3);
std::memset(black, 0, 640 * 480 * sizeof(uint8_t)* 3);
for (auto i = 0; i < 100; i++)
{
auto ret = fmv->EncodeFrame(white);
if (ret != 0)
{
std::cerr << "There was a problem encoding the frame: " << i << std::endl;
}
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
for (auto i = 0; i < 100; i++)
{
auto ret = fmv->EncodeFrame(black);
if (ret != 0)
{
std::cerr << "There was a problem encoding the frame: " << i << std::endl;
}
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
delete fmv;
}
Here is the resultant output via the console / my print SDP function.
[libx264 # 000000ac95f58440] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2
AVX FMA3 AVX2 LZCNT BMI2
[libx264 # 000000ac95f58440] profile Constrained Baseline, level 3.0
Output #0, mpegts, to '(null)':
Stream #0:0: Video: h264 (libx264), yuv420p, 640x480, q=-1--1, 400 kb/s, 30
tbn
SDP:
v=0
o=- 0 0 IN IP4 127.0.0.1
s=No Name
t=0 0
a=tool:libavformat 56.23.104
m=video 0 RTP/AVP 96
b=AS:400
a=rtpmap:96 H264/90000
a=fmtp:96 packetization-mode=1
a=control:streamid=0
Streaming Address: test.mpeg
[libx264 # 000000ac95f58440] frame I:45 Avg QP: 0.51 size: 1315
[libx264 # 000000ac95f58440] frame P:136 Avg QP: 0.29 size: 182
[libx264 # 000000ac95f58440] mb I I16..4: 99.7% 0.0% 0.3%
[libx264 # 000000ac95f58440] mb P I16..4: 0.1% 0.0% 0.1% P16..4: 0.1% 0.0
% 0.0% 0.0% 0.0% skip:99.7%
[libx264 # 000000ac95f58440] final ratefactor: -68.99
[libx264 # 000000ac95f58440] coded y,uvDC,uvAC intra: 0.5% 0.5% 0.5% inter: 0.0%
0.1% 0.1%
[libx264 # 000000ac95f58440] i16 v,h,dc,p: 96% 0% 3% 0%
[libx264 # 000000ac95f58440] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 1% 10% 85% 0% 3%
0% 1% 0% 0%
[libx264 # 000000ac95f58440] i8c dc,h,v,p: 100% 0% 0% 0%
[libx264 # 000000ac95f58440] ref P L0: 46.8% 25.2% 28.0%
[libx264 # 000000ac95f58440] kb/s:0.03
I know there are probably many issues with this program, I am very new with FFMPEG and multimedia programming in general. Ive used many pieces of code found through searching google/ stack overflow to get to this point as is. The file has a good size but comes up as length 0.04 tells me that my time stamping must be broken between the frames / pkts, but I am unsure on how to fix this issue.
I tried inspecting the file with ffmpeg.exe using ffmpeg -i and outputting to a regular TS. It seems my code works more then I originally intended however, I am simply trying to output a bunch of all white frames.
ffmpeg -i test.mpeg test.ts
ffmpeg version N-70125-g6c9537b Copyright (c) 2000-2015 the FFmpeg developers
built with gcc 4.9.2 (GCC)
configuration: --disable-static --enable-shared --enable-gpl --enable-version3
--disable-w32threads --enable-avisynth --enable-bzlib --enable-fontconfig --ena
ble-frei0r --enable-gnutls --enable-iconv --enable-libass --enable-libbluray --e
nable-libbs2b --enable-libcaca --enable-libfreetype --enable-libgme --enable-lib
gsm --enable-libilbc --enable-libmodplug --enable-libmp3lame --enable-libopencor
e-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enabl
e-librtmp --enable-libschroedinger --enable-libsoxr --enable-libspeex --enable-l
ibtheora --enable-libtwolame --enable-libvidstab --enable-libvo-aacenc --enable-
libvo-amrwbenc --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-l
ibwebp --enable-libx264 --enable-libx265 --enable-libxavs --enable-libxvid --ena
ble-lzma --enable-decklink --enable-zlib
libavutil 54. 19.100 / 54. 19.100
libavcodec 56. 26.100 / 56. 26.100
libavformat 56. 23.104 / 56. 23.104
libavdevice 56. 4.100 / 56. 4.100
libavfilter 5. 11.101 / 5. 11.101
libswscale 3. 1.101 / 3. 1.101
libswresample 1. 1.100 / 1. 1.100
libpostproc 53. 3.100 / 53. 3.100
Input #0, mpegts, from 'test.mpeg':
Duration: 00:00:00.04, start: 0.000000, bitrate: 24026 kb/s
Program 1
Metadata:
service_name : Service01
service_provider: FFmpeg
Stream #0:0[0x100]: Video: h264 (Constrained Baseline) ([27][0][0][0] / 0x00
1B), yuv420p, 640x480, 25 fps, 25 tbr, 90k tbn, 50 tbc
File 'test.ts' already exists. Overwrite ? [y/N] y
Output #0, mpegts, to 'test.ts':
Metadata:
encoder : Lavf56.23.104
Stream #0:0: Video: mpeg2video, yuv420p, 640x480, q=2-31, 200 kb/s, 25 fps,
90k tbn, 25 tbc
Metadata:
encoder : Lavc56.26.100 mpeg2video
Stream mapping:
Stream #0:0 -> #0:0 (h264 (native) -> mpeg2video (native))
Press [q] to stop, [?] for help
frame= 3 fps=0.0 q=2.0 Lsize= 9kB time=00:00:00.08 bitrate= 883.6kbits/
s dup=0 drop=178
video:7kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing ove
rhead: 22.450111%

avpicture_fill does not do what you think it does. It does not fill the picture using data from ptr, as the source, It fills the picture using ptr as the destination. So basically, you are clearing the image before you encode it.

on your av_packet_rescale_ts(pkt, *time_base, st->time_base);
you are using AvCodecContext::time_base
and you set AvCodecContext::framerate instead.
st->time_base.num = 1;
st->time_base.den = 30;
st->codec->framerate.num = 1;
st->codec->framerate.den = 30;
change to:
st->time_base.num = 1;
st->time_base.den = 30;
st->codec->time_base = st->time_base;

Related

Blurry picture when decoding h264 mpegts udp stream with ffmpeg

Im using ffmpeg do read an udp stream (contains only video) and to decode frames , I then like to encode again , but during the decoding or from the demuxing i get blurry pictures pictures especially the lower part.
I have a video player that also uses ffmpeg that displays the video perfectly and I try to look in that code but I don't see any differences.
In the log I se things like
Invalid NAL unit 8, skipping.
nal_unit_type: 1(Coded slice of a non-IDR picture), nal_ref_idc: 3
Invalid NAL unit 7, skipping.
bytestream overread td
error while decoding MB 109 49, bytestream td
The main things in the code looks like:
av_register_all();
AVFormatContext *fmt_ctx = 0;
AVDictionary *options = 0;
av_dict_set(&options, "analyzeduration", "500000", NULL);
av_dict_set(&options, "probesize", "500000", NULL);
char* url = "udp://239.0.0.3:8081";
avformat_open_input(&fmt_ctx, url, 0, &options);
avformat_find_stream_info(fmt_ctx, &options);
int nRet = 0;
av_dump_format(fmt_ctx, 0, url, 0);
AVStream *pStream = fmt_ctx->streams[0];
AVCodecID nCodecid = pStream->codec->codec_id;
AVCodec* pCodec = avcodec_find_decoder(nCodecid);
AVCodecContext* pCodecCtx = pStream->codec;
nRet = avcodec_open2(pCodecCtx, pCodec, NULL);
int nInH = pStream->codec->height;
int nInW = pStream->codec->width;
int nOutW = nInW / 4;
int nOutH = nInH / 4;
SwsContext* pSwsCtx = sws_getContext(nInW, nInH, AV_PIX_FMT_YUV420P,
nOutW, nOutH, AV_PIX_FMT_RGB24,
SWS_BICUBIC, NULL, NULL, NULL);
m_pFilmWdg->m_img = QImage(nOutW, nOutH, QImage::Format_RGB888);
int linesizes[4];
av_image_fill_linesizes(linesizes, AV_PIX_FMT_RGB24, nOutW);
for (;;)
{
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
nRet = av_read_frame(fmt_ctx, &pkt);
nRet = avcodec_send_packet(pCodecCtx, &pkt);
AVFrame* picture = av_frame_alloc();
nRet = avcodec_receive_frame(pCodecCtx, picture);
if (AVERROR(EAGAIN) == nRet)
continue;
uint8_t* p[] = { m_pFilmWdg->m_img.bits() };
nRet = sws_scale(pSwsCtx, picture->data, picture->linesize, 0, nInH, p, linesizes);
av_packet_unref(&pkt);
av_frame_free(&picture);
m_pFilmWdg->update();
}
I found the the problem after a lot of debugging . The issue was that udp packages was lost or damaged during the time of decoding. I split up the loop in too two in different threads. One
DemuxLoop and one DecodeLoop something like
void VideoDecode::DemuxLoop()
{
..
int nRet = av_read_frame(fmt_ctx, &pkt);
put in queue
}
void VideoDecode::DecodeLoop()
{
...
pick from queue
int nRet = avcodec_send_packet(pCodecCtx, &pkt);
AVFrame* picture = av_frame_alloc();
nRet = avcodec_receive_frame(pCodecCtx, picture);
...
}

x264 encoding timebase calculation

I have yuv frames that I am converting to h264 frames using libx264 successfully. I know the time difference between each frame. Then I use the following ffmpeg command to convert h264 video file to mp4:
ffmpeg.exe -i frames.h264 -c:v copy -f mp4 frames.mp4
This works. However I can't figure out how to set the variable frame rate properly. The movie plays too fast. I don't have fixed frame rate, otherwise I would set that and the resulting mp4 is the correct duration.
I have the time difference between each frame in milliseconds. How can I set the timebase and pts so that when I convert the h264 file to mp4, it is the correct duration.
bool X264Encoder::init(int width, int height,
AVRational &timebase, // what should timebase be?
int fps)
{
x264_param_t param;
if( x264_param_default_preset( &param, "medium", NULL ) < 0 )
{ return false; }
/* Configure non-default params */
param.i_bitdepth = 8;
param.i_csp = X264_CSP_I420;
param.i_width = width;
param.i_height = height;
param.b_vfr_input = 0;
param.b_repeat_headers = 1;
param.b_annexb = 1;
if(0) // don't have fixed frame rate, can't use it
{
param.b_vfr_input = 0;
param.i_fps_num = 4;
param.i_fps_den = 1;
}
else
{
param.b_vfr_input = 1;
param.i_timebase_num = timebase.den;
param.i_timebase_den = timebase.num;
}
if( x264_param_apply_profile(&param, "high" ) < 0 )
{ return false; }
m_pic_in = new x264_picture_t;
x264_picture_init(m_pic_in);
m_pic_in->img.i_csp = X264_CSP_I420;
m_pic_in->img.i_plane = 3;
m_encoder = x264_encoder_open(&param);
}
//This is called for each frame
void X264Encoder::encode(uint8_t *plane[4], int64_t pts) // what should pts be?
{
m_pic_in->img.plane[0] = plane[0]; // Y frame
m_pic_in->img.i_stride[0] = m_width;
m_pic_in->img.plane[1] = plane[1]; // U frame
m_pic_in->img.i_stride[1] = m_width;
m_pic_in->img.plane[2] = plane[2]; // V frame
m_pic_in->img.i_stride[2] = m_width;
x264_picture_t pic_out;
m_pic_in->i_pts = pts;
int frame_size = x264_encoder_encode(m_encoder, &m_nals, &m_i_nals, m_pic_in, &pic_out);
if(frame_size <= 0)
{
return;
}
writeH264Frame(m_nals->p_payload, frame_size); // basically same as fwrite
}
In general timebase is units of your frame timestamps i.e. if your first frames have deltas 40ms and 33ms than you set timebase 1/1000 and provide pts = 0, 40, 73. But raw H.264 (Annex B) format doesn't have timestamp information so it is not enough to save VFR video. You need to provide timestamp file when converting to mp4 or you need save directly to mp4 while you have pts/dts information of encoded frames.

FFmpeg - MJPEG decoding gives inconsistent values

I have a set of JPEG frames which I am muxing into an avi, which gives me a mjpeg video. This is the command I run on the console:
ffmpeg -y -start_number 0 -i %06d.JPEG -codec copy vid.avi
When I try to demux the video using ffmpeg C api, I get frames which are slightly different in values. Demuxing code looks something like this:
AVFormatContext* fmt_ctx = NULL;
AVCodecContext* cdc_ctx = NULL;
AVCodec* vid_cdc = NULL;
int ret;
unsigned int height, width;
....
// read_nframes is the number of frames to read
output_arr = new unsigned char [height * width * 3 *
sizeof(unsigned char) * read_nframes];
avcodec_open2(cdc_ctx, vid_cdc, NULL);
int num_bytes;
uint8_t* buffer = NULL;
const AVPixelFormat out_format = AV_PIX_FMT_RGB24;
num_bytes = av_image_get_buffer_size(out_format, width, height, 1);
buffer = (uint8_t*)av_malloc(num_bytes * sizeof(uint8_t));
AVFrame* vid_frame = NULL;
vid_frame = av_frame_alloc();
AVFrame* conv_frame = NULL;
conv_frame = av_frame_alloc();
av_image_fill_arrays(conv_frame->data, conv_frame->linesize, buffer,
out_format, width, height, 1);
struct SwsContext *sws_ctx = NULL;
sws_ctx = sws_getContext(width, height, cdc_ctx->pix_fmt,
width, height, out_format,
SWS_BILINEAR, NULL,NULL,NULL);
int frame_num = 0;
AVPacket vid_pckt;
while (av_read_frame(fmt_ctx, &vid_pckt) >=0) {
ret = avcodec_send_packet(cdc_ctx, &vid_pckt);
if (ret < 0)
break;
ret = avcodec_receive_frame(cdc_ctx, vid_frame);
if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
break;
if (ret >= 0) {
// convert image from native format to planar GBR
sws_scale(sws_ctx, vid_frame->data,
vid_frame->linesize, 0, vid_frame->height,
conv_frame->data, conv_frame->linesize);
unsigned char* r_ptr = output_arr +
(height * width * sizeof(unsigned char) * 3 * frame_num);
unsigned char* g_ptr = r_ptr + (height * width * sizeof(unsigned char));
unsigned char* b_ptr = g_ptr + (height * width * sizeof(unsigned char));
unsigned int pxl_i = 0;
for (unsigned int r = 0; r < height; ++r) {
uint8_t* avframe_r = conv_frame->data[0] + r*conv_frame->linesize[0];
for (unsigned int c = 0; c < width; ++c) {
r_ptr[pxl_i] = avframe_r[0];
g_ptr[pxl_i] = avframe_r[1];
b_ptr[pxl_i] = avframe_r[2];
avframe_r += 3;
++pxl_i;
}
}
++frame_num;
if (frame_num >= read_nframes)
break;
}
}
...
In my experience around two-thirds of the pixel values are different, each by +-1 (in a range of [0,255]). I am wondering is it due to some decoding scheme FFmpeg uses for reading JPEG frames? I tried encoding and decoding png frames, and it works perfectly fine. I am sure this is something to do with the libav decoding process because the MD5 values are consistent between the images and the video:
ffmpeg -i %06d.JPEG -f framemd5 -
ffmpeg -i vid.avi -f framemd5 -
In short my goal is to get the same pixel by pixel values for each JPEG frame as I would I have gotten if I was reading the JPEG images directly. Here is the stand-alone bitbucket code I used. It includes cmake files to build code, and a couple of jpeg frames with the converted avi file to test this problem. (give '--filetype png' to test the png decoding).

Creating a video from images using ffmpeg libav and libx264?

I am trying to create a video from images using the ffmpeg library. The images have a size of 1920x1080 and are supposed to be encoded with H.264 using a .mkv container.
I have come across various problems, thinking I am getting closer to a solution, but this one I am really stuck on. With the settings I use, the first X frames (around 40, depending on what and how many images I use for the video) of my video are not encoded. avcodec_encode_video2 does not return any error (return value is 0) with got_picture_ptr = 0.
The result is a video that actually looks as expected, but the first seconds are weirdly jumpy.
So this is how I create the video file:
// m_codecContext is an instance variable of type AVCodecContext *
// m_formatCtx is an instance variable of type AVFormatContext *
// outputFileName is a valid filename ending with .mkv
AVOutputFormat *oformat = av_guess_format(NULL, outputFileName, NULL);
if (oformat == NULL)
{
oformat = av_guess_format("mpeg", NULL, NULL);
}
// oformat->video_codec is AV_CODEC_ID_H264
AVCodec *codec = avcodec_find_encoder(oformat->video_codec);
m_codecContext = avcodec_alloc_context3(codec);
m_codecContext->codec_id = oformat->video_codec;
m_codecContext->codec_type = AVMEDIA_TYPE_VIDEO;
m_codecContext->gop_size = 30;
m_codecContext->bit_rate = width * height * 4
m_codecContext->width = width;
m_codecContext->height = height;
m_codecContext->time_base = (AVRational){1,frameRate};
m_codecContext->max_b_frames = 1;
m_codecContext->pix_fmt = AV_PIX_FMT_YUV420P;
m_formatCtx = avformat_alloc_context();
m_formatCtx->oformat = oformat;
m_formatCtx->video_codec_id = oformat->video_codec;
snprintf(m_formatCtx->filename, sizeof(m_formatCtx->filename), "%s", outputFileName);
AVStream *videoStream = avformat_new_stream(m_formatCtx, codec);
if(!videoStream)
{
printf("Could not allocate stream\n");
}
videoStream->codec = m_codecContext;
if(m_formatCtx->oformat->flags & AVFMT_GLOBALHEADER)
{
m_codecContext->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
avcodec_open2(m_codecContext, codec, NULL) < 0);
avio_open(&m_formatCtx->pb, outputFileName.toStdString().c_str(), AVIO_FLAG_WRITE);
avformat_write_header(m_formatCtx, NULL);
this is how the frames are added:
void VideoCreator::writeImageToVideo(const QSharedPointer<QImage> &img, int frameIndex)
{
AVFrame *frame = avcodec_alloc_frame();
/* alloc image and output buffer */
int size = m_codecContext->width * m_codecContext->height;
int numBytes = avpicture_get_size(m_codecContext->pix_fmt, m_codecContext->width, m_codecContext->height);
uint8_t *outbuf = (uint8_t *)malloc(numBytes);
uint8_t *picture_buf = (uint8_t *)av_malloc(numBytes);
int ret = av_image_fill_arrays(frame->data, frame->linesize, picture_buf, m_codecContext->pix_fmt, m_codecContext->width, m_codecContext->height, 1);
frame->data[0] = picture_buf;
frame->data[1] = frame->data[0] + size;
frame->data[2] = frame->data[1] + size/4;
frame->linesize[0] = m_codecContext->width;
frame->linesize[1] = m_codecContext->width/2;
frame->linesize[2] = m_codecContext->width/2;
fflush(stdout);
for (int y = 0; y < m_codecContext->height; y++)
{
for (int x = 0; x < m_codecContext->width; x++)
{
unsigned char b = img->bits()[(y * m_codecContext->width + x) * 4 + 0];
unsigned char g = img->bits()[(y * m_codecContext->width + x) * 4 + 1];
unsigned char r = img->bits()[(y * m_codecContext->width + x) * 4 + 2];
unsigned char Y = (0.257 * r) + (0.504 * g) + (0.098 * b) + 16;
frame->data[0][y * frame->linesize[0] + x] = Y;
if (y % 2 == 0 && x % 2 == 0)
{
unsigned char V = (0.439 * r) - (0.368 * g) - (0.071 * b) + 128;
unsigned char U = -(0.148 * r) - (0.291 * g) + (0.439 * b) + 128;
frame->data[1][y/2 * frame->linesize[1] + x/2] = U;
frame->data[2][y/2 * frame->linesize[2] + x/2] = V;
}
}
}
int pts = frameIndex;//(1.0 / 30.0) * 90.0 * frameIndex;
frame->pts = pts;//av_rescale_q(m_codecContext->coded_frame->pts, m_codecContext->time_base, formatCtx->streams[0]->time_base); //(1.0 / 30.0) * 90.0 * frameIndex;
int got_packet_ptr;
AVPacket packet;
av_init_packet(&packet);
packet.data = outbuf;
packet.size = numBytes;
packet.stream_index = formatCtx->streams[0]->index;
packet.flags |= AV_PKT_FLAG_KEY;
packet.pts = packet.dts = pts;
m_codecContext->coded_frame->pts = pts;
ret = avcodec_encode_video2(m_codecContext, &packet, frame, &got_packet_ptr);
if (got_packet_ptr != 0)
{
m_codecContext->coded_frame->pts = pts; // Set the time stamp
if (m_codecContext->coded_frame->pts != (0x8000000000000000LL))
{
pts = av_rescale_q(m_codecContext->coded_frame->pts, m_codecContext->time_base, formatCtx->streams[0]->time_base);
}
packet.pts = pts;
if(m_codecContext->coded_frame->key_frame)
{
packet.flags |= AV_PKT_FLAG_KEY;
}
std::cout << "pts: " << packet.pts << ", dts: " << packet.dts << std::endl;
av_interleaved_write_frame(formatCtx, &packet);
av_free_packet(&packet);
}
free(picture_buf);
free(outbuf);
av_free(frame);
printf("\n");
}
and this is the cleanup:
int numBytes = avpicture_get_size(m_codecContext->pix_fmt, m_codecContext->width, m_codecContext->height);
int got_packet_ptr = 1;
int ret;
// for(; got_packet_ptr != 0; i++)
while (got_packet_ptr)
{
uint8_t *outbuf = (uint8_t *)malloc(numBytes);
AVPacket packet;
av_init_packet(&packet);
packet.data = outbuf;
packet.size = numBytes;
ret = avcodec_encode_video2(m_codecContext, &packet, NULL, &got_packet_ptr);
if (got_packet_ptr)
{
av_interleaved_write_frame(m_formatCtx, &packet);
}
av_free_packet(&packet);
free(outbuf);
}
av_write_trailer(formatCtx);
avcodec_close(m_codecContext);
av_free(m_codecContext);
printf("\n");
I assume it might be tied to the PTS and DTS values, but I have tried EVERYTHING. The frame index seems to make the most sense. The images are correct, I can save them to files without any problems. I am running out of ideas.
I would be incredibly thankful if there was someone out there who knew better than me...
Cheers,
marikaner
UPDATE:
If this is of any help this is the output at the end of the video encoding:
[libx264 # 0x7fffc00028a0] frame I:19 Avg QP:14.24 size:312420
[libx264 # 0x7fffc00028a0] frame P:280 Avg QP:19.16 size:148867
[libx264 # 0x7fffc00028a0] frame B:181 Avg QP:21.31 size: 40540
[libx264 # 0x7fffc00028a0] consecutive B-frames: 24.6% 75.4%
[libx264 # 0x7fffc00028a0] mb I I16..4: 30.9% 45.5% 23.7%
[libx264 # 0x7fffc00028a0] mb P I16..4: 4.7% 9.1% 4.5% P16..4: 23.5% 16.6% 12.6% 0.0% 0.0% skip:28.9%
[libx264 # 0x7fffc00028a0] mb B I16..4: 0.6% 0.5% 0.3% B16..8: 26.7% 11.0% 5.5% direct: 3.9% skip:51.5% L0:39.4% L1:45.0% BI:15.6%
[libx264 # 0x7fffc00028a0] final ratefactor: 19.21
[libx264 # 0x7fffc00028a0] 8x8 transform intra:48.2% inter:47.3%
[libx264 # 0x7fffc00028a0] coded y,uvDC,uvAC intra: 54.9% 53.1% 30.4% inter: 25.4% 13.5% 4.2%
[libx264 # 0x7fffc00028a0] i16 v,h,dc,p: 41% 29% 11% 19%
[libx264 # 0x7fffc00028a0] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 16% 26% 31% 3% 4% 3% 7% 3% 6%
[libx264 # 0x7fffc00028a0] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 30% 26% 14% 4% 5% 4% 7% 4% 7%
[libx264 # 0x7fffc00028a0] i8c dc,h,v,p: 58% 26% 13% 3%
[libx264 # 0x7fffc00028a0] Weighted P-Frames: Y:17.1% UV:3.6%
[libx264 # 0x7fffc00028a0] ref P L0: 63.1% 21.4% 11.4% 4.1% 0.1%
[libx264 # 0x7fffc00028a0] ref B L0: 85.7% 14.3%
[libx264 # 0x7fffc00028a0] kb/s:27478.30
Libav is probably delaying the processing of the initial frames. A good practice is to check for any delayed frames after you have finished processing all frames. This is done as follows:
int i=NUMBER_OF_FRAMES_PREVIOUSLY_ENCODED
for(; got_packet_ptr; i++)
ret = avcodec_encode_video2(m_codecContext, &packet, NULL, &got_packet_ptr);
//Write the packets to a container after this.
The point is to pass a NULL pointer in place of the frame to be encoded and continue to do so until the packet you get is non-empty. See this link for the code example - the part under "get the delayed frames".
An easier way out would be to set the number of b frames to be 0.
m_codecContext->max_b_frames = 0;
Let me know if this works fine.
Also, you haven't used the libx264 API at all. You can make use of the libx264 APIs for encoding videos, they have a simpler and cleaner syntax. Plus it offers you more control over the settings and improved performance.
For writing the video stream to mkv container, you still will have to use the libav libraries. though.
At least for me adding
frame->width = m_codecContext->width;
frame->height = m_codecContext->height;
frame->format = m_codecContext->pix_fmt;
made this example code work as expected.

How to encode resampled PCM-audio to AAC using ffmpeg-API when input pcm samples count not equal 1024

I am working on capturing and streaming audio to RTMP server at a moment. I work under MacOS (in Xcode), so for capturing audio sample-buffer I use AVFoundation-framework. But for encoding and streaming I need to use ffmpeg-API and libfaac encoder. So output format must be AAC (for supporting stream playback on iOS-devices).
And I faced with such problem: audio-capturing device (in my case logitech camera) gives me sample-buffer with 512 LPCM samples, and I can select input sample-rate from 16000, 24000, 36000 or 48000 Hz. When I give these 512 samples to AAC-encoder (configured for appropriate sample-rate), I hear a slow and jerking audio (seems as like pice of silence after each frame).
I figured out (maybe I am wrong), that libfaac encoder accepts audio frames only with 1024 samples. When I set input samplerate to 24000 and resample input sample-buffer to 48000 before encoding, I obtain 1024 resampled samples. After encoding these 1024 sampels to AAC, I hear proper sound on output. But my web-cam produce 512 samples in buffer for any input samplerate, when output sample-rate must be 48000 Hz. So I need to do resampling in any case, and I will not obtain exactly 1024 samples in buffer after resampling.
Is there a way to solve this problem within ffmpeg-API functionality?
I would be grateful for any help.
PS:
I guess that I can accumulate resampled buffers until count of samples become 1024, and then encode it, but this is stream so there will be troubles with resulting timestamps and with other input devices, and such solution is not suitable.
The current issue came out of the problem described in [question]: How to fill audio AVFrame (ffmpeg) with the data obtained from CMSampleBufferRef (AVFoundation)?
Here is a code with audio-codec configs (there also was video stream but video work fine):
/*global variables*/
static AVFrame *aframe;
static AVFrame *frame;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVStream *audio_st, *video_st;
Init ()
{
AVCodec *audio_codec, *video_codec;
int ret;
avcodec_register_all();
av_register_all();
avformat_network_init();
avformat_alloc_output_context2(&oc, NULL, "flv", filename);
fmt = oc->oformat;
oc->oformat->video_codec = AV_CODEC_ID_H264;
oc->oformat->audio_codec = AV_CODEC_ID_AAC;
video_st = NULL;
audio_st = NULL;
if (fmt->video_codec != AV_CODEC_ID_NONE)
{ //… /*init video codec*/}
if (fmt->audio_codec != AV_CODEC_ID_NONE) {
audio_codec= avcodec_find_encoder(fmt->audio_codec);
if (!(audio_codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(fmt->audio_codec));
exit(1);
}
audio_st= avformat_new_stream(oc, audio_codec);
if (!audio_st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
audio_st->id = oc->nb_streams-1;
//AAC:
audio_st->codec->sample_fmt = AV_SAMPLE_FMT_S16;
audio_st->codec->bit_rate = 32000;
audio_st->codec->sample_rate = 48000;
audio_st->codec->profile=FF_PROFILE_AAC_LOW;
audio_st->time_base = (AVRational){1, audio_st->codec->sample_rate };
audio_st->codec->channels = 1;
audio_st->codec->channel_layout = AV_CH_LAYOUT_MONO;
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
audio_st->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (video_st)
{
// …
/*prepare video*/
}
if (audio_st)
{
aframe = avcodec_alloc_frame();
if (!aframe) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
AVCodecContext *c;
int ret;
c = audio_st->codec;
ret = avcodec_open2(c, audio_codec, 0);
if (ret < 0) {
fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
exit(1);
}
//…
}
And resampling and encoding audio:
if (mType == kCMMediaType_Audio)
{
CMSampleTimingInfo timing_info;
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &timing_info);
double pts=0;
double dts=0;
AVCodecContext *c;
AVPacket pkt = { 0 }; // data and size must be 0;
int got_packet, ret;
av_init_packet(&pkt);
c = audio_st->codec;
CMItemCount numSamples = CMSampleBufferGetNumSamples(sampleBuffer);
NSUInteger channelIndex = 0;
CMBlockBufferRef audioBlockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
size_t audioBlockBufferOffset = (channelIndex * numSamples * sizeof(SInt16));
size_t lengthAtOffset = 0;
size_t totalLength = 0;
SInt16 *samples = NULL;
CMBlockBufferGetDataPointer(audioBlockBuffer, audioBlockBufferOffset, &lengthAtOffset, &totalLength, (char **)(&samples));
const AudioStreamBasicDescription *audioDescription = CMAudioFormatDescriptionGetStreamBasicDescription(CMSampleBufferGetFormatDescription(sampleBuffer));
SwrContext *swr = swr_alloc();
int in_smprt = (int)audioDescription->mSampleRate;
av_opt_set_int(swr, "in_channel_layout", AV_CH_LAYOUT_MONO, 0);
av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout, 0);
av_opt_set_int(swr, "in_channel_count", audioDescription->mChannelsPerFrame, 0);
av_opt_set_int(swr, "out_channel_count", audio_st->codec->channels, 0);
av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout, 0);
av_opt_set_int(swr, "in_sample_rate", audioDescription->mSampleRate,0);
av_opt_set_int(swr, "out_sample_rate", audio_st->codec->sample_rate,0);
av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", audio_st->codec->sample_fmt, 0);
swr_init(swr);
uint8_t **input = NULL;
int src_linesize;
int in_samples = (int)numSamples;
ret = av_samples_alloc_array_and_samples(&input, &src_linesize, audioDescription->mChannelsPerFrame,
in_samples, AV_SAMPLE_FMT_S16P, 0);
*input=(uint8_t*)samples;
uint8_t *output=NULL;
int out_samples = av_rescale_rnd(swr_get_delay(swr, in_smprt) +in_samples, (int)audio_st->codec->sample_rate, in_smprt, AV_ROUND_UP);
av_samples_alloc(&output, NULL, audio_st->codec->channels, out_samples, audio_st->codec->sample_fmt, 0);
in_samples = (int)numSamples;
out_samples = swr_convert(swr, &output, out_samples, (const uint8_t **)input, in_samples);
aframe->nb_samples =(int) out_samples;
ret = avcodec_fill_audio_frame(aframe, audio_st->codec->channels, audio_st->codec->sample_fmt,
(uint8_t *)output,
(int) out_samples *
av_get_bytes_per_sample(audio_st->codec->sample_fmt) *
audio_st->codec->channels, 1);
aframe->channel_layout = audio_st->codec->channel_layout;
aframe->channels=audio_st->codec->channels;
aframe->sample_rate= audio_st->codec->sample_rate;
if (timing_info.presentationTimeStamp.timescale!=0)
pts=(double) timing_info.presentationTimeStamp.value/timing_info.presentationTimeStamp.timescale;
aframe->pts=pts*audio_st->time_base.den;
aframe->pts = av_rescale_q(aframe->pts, audio_st->time_base, audio_st->codec->time_base);
ret = avcodec_encode_audio2(c, &pkt, aframe, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
exit(1);
}
swr_free(&swr);
if (got_packet)
{
pkt.stream_index = audio_st->index;
pkt.pts = av_rescale_q(pkt.pts, audio_st->codec->time_base, audio_st->time_base);
pkt.dts = av_rescale_q(pkt.dts, audio_st->codec->time_base, audio_st->time_base);
// Write the compressed frame to the media file.
ret = av_interleaved_write_frame(oc, &pkt);
if (ret != 0) {
fprintf(stderr, "Error while writing audio frame: %s\n",
av_err2str(ret));
exit(1);
}
}
I also ended up here after having a similar problem. I'm reading audio and video from a Blackmagic Decklink SDI card in 720p50 meaning I had 960 samples per videoframe (48k/50fps) I wanted to encode together with the video. Got really weird audio when only sending 960 samples to aacenc and it didn't really complain about this fact either.
Started to use AVAudioFifo (see ffmpeg/doc/examples/transcode_aac.c) and kept adding frames to it until I had enough frames to satisfy aacenc. This will mean I have samples playing too late I guess, since pts will be set on 1024 samples when the first 960 should really have another value. But, it's not really noticeable as far as I can hear/see.
I got a similar problem. I was encoding PCM packets to AAC while the length of PCM packets are sometimes smaller than 1024.
If I encode the packet that's smaller than 1024, the audio will be slow. On the other hand, if I throw it away, the audio will get faster. swr_convert function didn't have any automatic buffering from my observation.
I ended up with a buffer scheme that packets was filled to a 1024 buffer and the buffer gets encoded and cleaned everytime it's full.
The function to fill buffer is below:
// put frame data into buffer of fixed size
bool ffmpegHelper::putAudioBuffer(const AVFrame *pAvFrameIn, AVFrame **pAvFrameBuffer, AVCodecContext *dec_ctx, int frame_size, int &k0) {
// prepare pFrameAudio
if (!(*pAvFrameBuffer)) {
if (!(*pAvFrameBuffer = av_frame_alloc())) {
av_log(NULL, AV_LOG_ERROR, "Alloc frame failed\n");
return false;
} else {
(*pAvFrameBuffer)->format = dec_ctx->sample_fmt;
(*pAvFrameBuffer)->channels = dec_ctx->channels;
(*pAvFrameBuffer)->sample_rate = dec_ctx->sample_rate;
(*pAvFrameBuffer)->nb_samples = frame_size;
int ret = av_frame_get_buffer(*pAvFrameBuffer, 0);
if (ret < 0) {
char err[500];
av_log(NULL, AV_LOG_ERROR, "get audio buffer failed: %s\n",
av_make_error_string(err, AV_ERROR_MAX_STRING_SIZE, ret));
return false;
}
(*pAvFrameBuffer)->nb_samples = 0;
(*pAvFrameBuffer)->pts = pAvFrameIn->pts;
}
}
// copy input data to buffer
int n_channels = pAvFrameIn->channels;
int new_samples = min(pAvFrameIn->nb_samples - k0, frame_size - (*pAvFrameBuffer)->nb_samples);
int k1 = (*pAvFrameBuffer)->nb_samples;
if (pAvFrameIn->format == AV_SAMPLE_FMT_S16) {
int16_t *d_in = (int16_t *)pAvFrameIn->data[0];
d_in += n_channels * k0;
int16_t *d_out = (int16_t *)(*pAvFrameBuffer)->data[0];
d_out += n_channels * k1;
for (int i = 0; i < new_samples; ++i) {
for (int j = 0; j < pAvFrameIn->channels; ++j) {
*d_out++ = *d_in++;
}
}
} else {
printf("not handled format for audio buffer\n");
return false;
}
(*pAvFrameBuffer)->nb_samples += new_samples;
k0 += new_samples;
return true;
}
And the loop for fill buffer and encode is below:
// transcoding needed
int got_frame;
AVMediaType stream_type;
// decode the packet (do it your self)
decodePacket(packet, dec_ctx, &pAvFrame_, got_frame);
if (enc_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
ret = 0;
// break audio packet down to buffer
if (enc_ctx->frame_size > 0) {
int k = 0;
while (k < pAvFrame_->nb_samples) {
if (!putAudioBuffer(pAvFrame_, &pFrameAudio_, dec_ctx, enc_ctx->frame_size, k))
return false;
if (pFrameAudio_->nb_samples == enc_ctx->frame_size) {
// the buffer is full, encode it (do it yourself)
ret = encodeFrame(pFrameAudio_, stream_index, got_frame, false);
if (ret < 0)
return false;
pFrameAudio_->pts += enc_ctx->frame_size;
pFrameAudio_->nb_samples = 0;
}
}
} else {
ret = encodeFrame(pAvFrame_, stream_index, got_frame, false);
}
} else {
// encode packet directly
ret = encodeFrame(pAvFrame_, stream_index, got_frame, false);
}
You have to break sample buffer into chunks of size 1024, i did for recording mp3 in android for more info follow these links link1,links2
If anyone ended up here, I had the same issue, and just as #Mohit pointed out for AAC each audio frame has to be broken down into 1024 bytes chunks.
example:
uint8_t *buffer = (uint8_t*) malloc(1024);
AVFrame *frame = av_frame_alloc();
while((fread(buffer, 1024, 1, fp)) == 1) {
frame->data[0] = buffer;
}
A possible solution is to use asetnsamples filter which sets the number of samples for each output audio frame :
https://ffmpeg.org/ffmpeg-filters.html#asetnsamples
You can feed the filter with your input frames and the resulting output frames each have the desired number of samples. The value for the number of samples in filter should be equal to frame_size of the encoder AVCodecContext.

Resources