x264 & libavcodec - windows

After some considerable amount of time while trying to build the ffmpeg static library with the x264 encoder on Windows, I have spent some more time for writing some example with it.
Of course, there are tons of "instructions" on how to build, how to use, bla bla... But, non of them works on Windows. I guess the Linux guys are in better position here. Now, the zillion dollars question is "What's the purpose of all that?". Not only that this is useless on Windows, but I could have bought some third party library that actually works.
If somebody is about to say "But, it works!". I must say, give me a working proof. I don't care about 200x100 at 10fps. I don't need H264 for that. Show me how to compress a single second of 1080i footage. It's H264, it's crossplatform (sounds funny if you ask me), Google is using it (it has to be perfect, right?), some more hipe here...

Firstly don't try and build on windows - especially if you use VS - get it from here
Then the sequence is something like:
// in ctor
ffmpeg::avcodec_register_all();
ffmpeg::avcodec_init();
ffmpeg::av_register_all();
bool createFile(const String &fileName,unsigned int width,unsigned int height,unsigned int fps)
{
close();
pFormatCtx=ffmpeg::avformat_alloc_context();
if(!pFormatCtx)
{
printf("Error allocating format context\n");
return false;
}
pOutputFormat = ffmpeg::av_guess_format( "mp4", filename,NULL);
pFormatCtx->oformat = pOutputFormat;
_snprintf(pFormatCtx->filename, sizeof(pFormatCtx->filename), "%s",filename);
// Add the video stream
pVideoStream = av_new_stream(pFormatCtx,0);
if(!pVideoStream )
{
printf("Could not allocate stream\n");
return false;
}
pCodecCtx=pVideoStream->codec;
pCodecCtx->codec_id = pOutputFormat->video_codec;
pCodecCtx->codec_type = ffmpeg::AVMEDIA_TYPE_VIDEO;
pCodecCtx->width = Width = width;
pCodecCtx->height = Height = height;
pCodecCtx->time_base.num = 1;
pCodecCtx->time_base.den = Fps = fps;
pCodecCtx->pix_fmt = ffmpeg::PIX_FMT_YUV420P;
// needed for x264 to work
pCodecCtx->me_range = 16;
pCodecCtx->max_qdiff = 4;
pCodecCtx->qmin = 10;
pCodecCtx->qmax = 51;
pCodecCtx->qcompress = 0.6;
pCodecCtx->gop_size = 12;
avcodec_thread_init(pCodecCtx, 10);
// some formats want stream headers to be separate
if(pFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
pCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (av_set_parameters(pFormatCtx, NULL) < 0)
{
printf("Invalid output format parameters\n");
return false;
}
ffmpeg::dump_format(pFormatCtx, 0, pFormatCtx->filename, 1);
// open_video
// find the video encoder
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec)
{
printf("codec not found\n");
return false;
}
// open the codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
{
printf("could not open codec\n");
return false;
}
// Allocate memory for output
if(!initOutputBuf())
{
printf("Can't allocate memory for output bitstream\n");
return false;
}
// Allocate the YUV frame
if(!initFrame())
{
printf("Can't init frame\n");
return false;
}
if (url_fopen(&pFormatCtx->pb,pFormatCtx->filename, URL_WRONLY) < 0)
{
printf( "Could not open '%s'\n", pFormatCtx->filename);
return false;
}
av_write_header(pFormatCtx);
return true;
}
Then for each frame
int encodeImage(const QImage &img)
{
if (!convertImage_sws(img)) { // SWS conversion
return false;
}
ppicture->pts=pCodecCtx->frame_number;
//memset(outbuf,0,outbuf_size);
int out_size = ffmpeg::avcodec_encode_video(pCodecCtx,outbuf,outbuf_size,ppicture);
if (out_size > 0) {
ffmpeg::AVPacket pkt;
av_init_packet(&pkt);
if (pCodecCtx->coded_frame->pts != (0x8000000000000000LL))
pkt.pts= av_rescale_q(pCodecCtx->coded_frame->pts, pCodecCtx->time_base, pVideoStream->time_base);
if(pCodecCtx->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index= pVideoStream->index;
pkt.data= outbuf;
pkt.size= out_size;
if (av_write_frame(pFormatCtx, &pkt) < 0 ) {
return 0;
}
}
return out_size;
}
void close()
{
av_write_trailer(pFormatCtx);
// close_video
avcodec_close(pVideoStream->codec);
freeFrame();
freeOutputBuf();
/* free the streams */
for(int i = 0; i < pFormatCtx->nb_streams; i++)
{
av_freep(&pFormatCtx->streams[i]->codec);
av_freep(&pFormatCtx->streams[i]);
}
// Close file
url_fclose(pFormatCtx->pb);
// Free the stream
av_free(pFormatCtx);
}
bool initFrame()
{
ppicture = ffmpeg::avcodec_alloc_frame();
if(ppicture==0)
return false;
int size = avpicture_get_size(pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
picture_buf = new uint8_t[size];
if(picture_buf==0)
{
av_free(ppicture);
ppicture=0;
return false;
}
// Setup the planes
avpicture_fill((ffmpeg::AVPicture *)ppicture, picture_buf,pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
ppicture->pts = 0;
return true;
}

If you want to encode with libavcodec+libx264 in interlaced mode, use CODEC_FLAG_INTERLACED_DCT. If possible you should use libx264 or the CLI program directly though, it's less work.

Related

Output black when I decode h264 720p with ffmpeg

First, sorry for my english. When I decode h264 720p in ardrone2.0 my output is black and I cant see anything.
I have try to change the value of pCodecCtx->pix_fmt = AV_PIX_FMT_BGR24; to pCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P; and the value of pCodecCtxH264->pix_fmt = AV_PIX_FMT_BGR24; to pCodecCtxH264->pix_fmt = AV_PIX_FMT_YUV420P; but my program crash. What am I doing wrong?. Thank you, see part of my code:
av_register_all();
avcodec_register_all();
avformat_network_init();
// 1.2. Open video file
if(avformat_open_input(&pFormatCtx, drone_addr, NULL, NULL) != 0) {
mexPrintf("No conecct with Drone");
EndVideo();
return;
}
pCodec = avcodec_find_decoder(AV_CODEC_ID_H264);
pCodecCtx = avcodec_alloc_context3(pCodec);
pCodecCtx->pix_fmt = AV_PIX_FMT_BGR24;
pCodecCtx->skip_frame = AVDISCARD_DEFAULT;
pCodecCtx->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
pCodecCtx->err_recognition = AV_EF_CAREFUL;
pCodecCtx->skip_loop_filter = AVDISCARD_DEFAULT;
pCodecCtx->workaround_bugs = FF_BUG_AUTODETECT;
pCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtx->codec_id = AV_CODEC_ID_H264;
pCodecCtx->skip_idct = AVDISCARD_DEFAULT;
pCodecCtx->width = 1280;
pCodecCtx->height = 720;
pCodecH264 = avcodec_find_decoder(AV_CODEC_ID_H264);
pCodecCtxH264 = avcodec_alloc_context3(pCodecH264);
pCodecCtxH264->pix_fmt = AV_PIX_FMT_BGR24;
pCodecCtxH264->skip_frame = AVDISCARD_DEFAULT;
pCodecCtxH264->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
pCodecCtxH264->err_recognition = AV_EF_CAREFUL;
pCodecCtxH264->skip_loop_filter = AVDISCARD_DEFAULT;
pCodecCtxH264->workaround_bugs = FF_BUG_AUTODETECT;
pCodecCtxH264->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtxH264->codec_id = AV_CODEC_ID_H264;
pCodecCtxH264->skip_idct = AVDISCARD_DEFAULT;
if(avcodec_open2(pCodecCtxH264, pCodecH264, &optionsDict) < 0)
{
mexPrintf("Error opening H264 codec");
return ;
}
pFrame_BGR24 = av_frame_alloc();
if(pFrame_BGR24 == NULL) {
mexPrintf("Could not allocate pFrame_BGR24\n");
return ;
}
// Determine required buffer size and allocate buffer
buffer_BGR24 =
(uint8_t *)av_mallocz(av_image_get_buffer_size(AV_PIX_FMT_BGR24,
pCodecCtx->width, ((pCodecCtx->height == 720) ? 720 : pCodecCtx->height) *
sizeof(uint8_t)*3,1));
// Assign buffer to image planes
av_image_fill_arrays(pFrame_BGR24->data, pFrame_BGR24->linesize,
buffer_BGR24,AV_PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height,1);
// format conversion context
pConvertCtx_BGR24 = sws_getContext(pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_BGR24,
SWS_BILINEAR | SWS_ACCURATE_RND, 0, 0, 0);
// 1.6. get video frames
pFrame = av_frame_alloc();
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
}
//Captura un frame
void video::capture(mxArray *plhs[]) {
if(av_read_frame(pFormatCtx, &packet) < 0){
mexPrintf("Error al leer frame");
return;
}
do {
do {
rest = avcodec_send_packet(pCodecCtxH264, &packet);
} while(rest == AVERROR(EAGAIN));
if(rest == AVERROR_EOF || rest == AVERROR(EINVAL)) {
printf("AVERROR(EAGAIN): %d, AVERROR_EOF: %d,
AVERROR(EINVAL): %d\n", AVERROR(EAGAIN), AVERROR_EOF,
AVERROR(EINVAL));
printf("fe_read_frame: Frame getting error (%d)!\n", rest);
return;
}
rest = avcodec_receive_frame(pCodecCtxH264, pFrame);
} while(rest == AVERROR(EAGAIN));
if(rest == AVERROR_EOF || rest == AVERROR(EINVAL)) {
// An error or EOF occured,index break out and return what
// we have so far.
printf("AVERROR(EAGAIN): %d, AVERROR_EOF: %d, AVERROR(EINVAL): %d\n",
AVERROR(EAGAIN), AVERROR_EOF, AVERROR(EINVAL));
printf("fe_read_frame: EOF or some othere decoding error (%d)!\n",
rest);
return;
}
// 2.1.1. convert frame to GRAYSCALE [or BGR] for OpenCV
sws_scale(pConvertCtx_BGR24, (const uint8_t* const*)pFrame->data,
pFrame->linesize, 0,pCodecCtx->height, pFrame_BGR24->data,
pFrame_BGR24->linesize);
//}
av_packet_unref(&packet);
av_init_packet(&packet);
mwSize dims[] = {(pCodecCtx->width)*((pCodecCtx->height == 720) ? 720 :
pCodecCtx->height)*sizeof(uint8_t)*3};
plhs[0] = mxCreateNumericArray(1,dims,mxUINT8_CLASS, mxREAL);
//plhs[0]=mxCreateDoubleMatrix(pCodecCtx->height,pCodecCtx-
>width,mxREAL);
point=mxGetPr(plhs[0]);
memcpy(point, pFrame_BGR24->data[0],(pCodecCtx->width)*(pCodecCtx-
>height)*sizeof(uint8_t)*3);
}
Go to debugger and see your memcpy. I am not sure if it works for all dimensions that you want. Also, there may be more memory problems. For example, try to see what is the value of buffer_BGR24 and pFrame. I bet that sometimes, they do not return right values. Check them out in the code.

FFMPEG AAC encoding causes audio to be lower in pitch

I built a sample application that encodes AAC (from PortAudio) into a MP4 container (no video stream).
The resulting audio is lower in pitch.
#include "stdafx.h"
#include "TestRecording.h"
#include "libffmpeg.h"
TestRecording::TestRecording()
{
}
TestRecording::~TestRecording()
{
}
struct RecordingContext
{
RecordingContext()
{
formatContext = NULL;
audioStream = NULL;
audioFrame = NULL;
audioFrameframeNumber = 0;
}
libffmpeg::AVFormatContext* formatContext;
libffmpeg::AVStream* audioStream;
libffmpeg::AVFrame* audioFrame;
int audioFrameframeNumber;
};
static int AudioRecordCallback(const void *inputBuffer, void *outputBuffer,
unsigned long framesPerBuffer,
const PaStreamCallbackTimeInfo* timeInfo,
PaStreamCallbackFlags statusFlags,
void *userData)
{
RecordingContext* recordingContext = (RecordingContext*)userData;
libffmpeg::avcodec_fill_audio_frame(recordingContext->audioFrame,
recordingContext->audioFrame->channels,
recordingContext->audioStream->codec->sample_fmt,
static_cast<const unsigned char*>(inputBuffer),
(framesPerBuffer * sizeof(float) * recordingContext->audioFrame->channels),
0);
libffmpeg::AVPacket pkt;
libffmpeg::av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
int gotpacket;
int result = avcodec_encode_audio2(recordingContext->audioStream->codec, &pkt, recordingContext->audioFrame, &gotpacket);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't encode the audio frame to acc");
return paContinue;
}
if (gotpacket)
{
pkt.stream_index = recordingContext->audioStream->index;
recordingContext->audioFrameframeNumber++;
// this codec requires no bitstream filter, just send it to the muxer!
result = libffmpeg::av_write_frame(recordingContext->formatContext, &pkt);
if (result < 0)
{
LOG(ERROR) << "Couldn't write the encoded audio frame";
libffmpeg::av_free_packet(&pkt);
return paContinue;
}
libffmpeg::av_free_packet(&pkt);
}
return paContinue;
}
static bool InitializeRecordingContext(RecordingContext* recordingContext)
{
int result = libffmpeg::avformat_alloc_output_context2(&recordingContext->formatContext, NULL, NULL, "C:\\Users\\Paul\\Desktop\\test.mp4");
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't create output format context");
return false;
}
libffmpeg::AVCodec *audioCodec;
audioCodec = libffmpeg::avcodec_find_encoder(libffmpeg::AV_CODEC_ID_AAC);
if (audioCodec == NULL)
{
LOG(ERROR) << "Couldn't find the encoder for AAC";
}
recordingContext->audioStream = libffmpeg::avformat_new_stream(recordingContext->formatContext, audioCodec);
if (!recordingContext->audioStream)
{
LOG(ERROR) << "Couldn't create the audio stream";
return false;
}
recordingContext->audioStream->codec->bit_rate = 64000;
recordingContext->audioStream->codec->sample_fmt = libffmpeg::AV_SAMPLE_FMT_FLTP;
recordingContext->audioStream->codec->sample_rate = 48000;
recordingContext->audioStream->codec->channel_layout = AV_CH_LAYOUT_STEREO;
recordingContext->audioStream->codec->channels = libffmpeg::av_get_channel_layout_nb_channels(recordingContext->audioStream->codec->channel_layout);
recordingContext->audioStream->codecpar->bit_rate = recordingContext->audioStream->codec->bit_rate;
recordingContext->audioStream->codecpar->format = recordingContext->audioStream->codec->sample_fmt;
recordingContext->audioStream->codecpar->sample_rate = recordingContext->audioStream->codec->sample_rate;
recordingContext->audioStream->codecpar->channel_layout = recordingContext->audioStream->codec->channel_layout;
recordingContext->audioStream->codecpar->channels = recordingContext->audioStream->codec->channels;
result = libffmpeg::avcodec_open2(recordingContext->audioStream->codec, audioCodec, NULL);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the audio codec");
return false;
}
// create a new frame to store the audio samples
recordingContext->audioFrame = libffmpeg::av_frame_alloc();
if (!recordingContext->audioFrame)
{
LOG(ERROR) << "Couldn't alloce the output audio frame";
return false;
}
recordingContext->audioFrame->nb_samples = recordingContext->audioStream->codec->frame_size;
recordingContext->audioFrame->channel_layout = recordingContext->audioStream->codec->channel_layout;
recordingContext->audioFrame->channels = recordingContext->audioStream->codec->channels;
recordingContext->audioFrame->format = recordingContext->audioStream->codec->sample_fmt;
recordingContext->audioFrame->sample_rate = recordingContext->audioStream->codec->sample_rate;
result = libffmpeg::av_frame_get_buffer(recordingContext->audioFrame, 0);
if (result < 0)
{
LOG(ERROR) << "Coudln't initialize the output audio frame buffer";
return false;
}
// some formats want video_stream headers to be separate
if (!strcmp(recordingContext->formatContext->oformat->name, "mp4") || !strcmp(recordingContext->formatContext->oformat->name, "mov") || !strcmp(recordingContext->formatContext->oformat->name, "3gp"))
{
recordingContext->audioStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
// open the ouput file
if (!(recordingContext->formatContext->oformat->flags & AVFMT_NOFILE))
{
result = libffmpeg::avio_open(&recordingContext->formatContext->pb, recordingContext->formatContext->filename, AVIO_FLAG_WRITE);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the output file");
return false;
}
}
// write the stream headers
result = libffmpeg::avformat_write_header(recordingContext->formatContext, NULL);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't write the headers to the file");
return false;
}
return true;
}
static bool FinalizeRecordingContext(RecordingContext* recordingContext)
{
int result = 0;
// write the trailing information
if (recordingContext->formatContext->pb)
{
result = libffmpeg::av_write_trailer(recordingContext->formatContext);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't write the trailer information");
return false;
}
}
// close all the codes
for (int i = 0; i < (int)recordingContext->formatContext->nb_streams; i++)
{
result = libffmpeg::avcodec_close(recordingContext->formatContext->streams[i]->codec);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't close the codec");
return false;
}
}
// close the output file
if (recordingContext->formatContext->pb)
{
if (!(recordingContext->formatContext->oformat->flags & AVFMT_NOFILE))
{
result = libffmpeg::avio_close(recordingContext->formatContext->pb);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't close the output file");
return false;
}
}
}
// free the format context and all of its data
libffmpeg::avformat_free_context(recordingContext->formatContext);
recordingContext->formatContext = NULL;
recordingContext->audioStream = NULL;
if (recordingContext->audioFrame)
{
libffmpeg::av_frame_free(&recordingContext->audioFrame);
recordingContext->audioFrame = NULL;
}
return true;
}
int TestRecording::Test()
{
PaError result = paNoError;
result = Pa_Initialize();
if (result != paNoError) LOGINT_WITH_MESSAGE(ERROR, result, "Error initializing audio device framework");
RecordingContext recordingContext;
if (!InitializeRecordingContext(&recordingContext))
{
LOG(ERROR) << "Couldn't start recording file";
return 0;
}
auto defaultDevice = Pa_GetDefaultInputDevice();
auto deviceInfo = Pa_GetDeviceInfo(defaultDevice);
PaStreamParameters inputParameters;
inputParameters.device = defaultDevice;
inputParameters.channelCount = 2;
inputParameters.sampleFormat = paFloat32;
inputParameters.suggestedLatency = deviceInfo->defaultLowInputLatency;
inputParameters.hostApiSpecificStreamInfo = NULL;
PaStream* stream = NULL;
result = Pa_OpenStream(
&stream,
&inputParameters,
NULL,
48000,
1024,
paClipOff,
AudioRecordCallback,
&recordingContext);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the audio stream");
result = Pa_StartStream(stream);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't start the audio stream");
Sleep(1000 * 5);
result = Pa_StopStream(stream);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't stop the audio stream");
if (!FinalizeRecordingContext(&recordingContext)) LOG(ERROR) << "Couldn't stop recording file";
result = Pa_CloseStream(stream);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't stop the audio stream");
return 0;
}
Here is the stdout, in case it helps.
https://gist.github.com/pauldotknopf/9f24a604ce1f8a081aa68da1bf169e98
Why is the audio lower in pitch? I assume I am overlooking a parameter that needs to be configured between PortAudio and FFMPEG. Is there something super obvious that I am missing?

How to understand the given ffplay C code snippet?

The following code snippet is from ffplay:
static int decoder_decode_frame(Decoder *d, AVFrame *frame, AVSubtitle *sub) {
int got_frame = 0;
do {
int ret = -1;
if (d->queue->abort_request)
return -1;
if (!d->packet_pending || d->queue->serial != d->pkt_serial) {
AVPacket pkt;
do {
if (d->queue->nb_packets == 0)
SDL_CondSignal(d->empty_queue_cond);
if (packet_queue_get(d->queue, &pkt, 1, &d->pkt_serial) < 0)
return -1;
if (pkt.data == flush_pkt.data) {
avcodec_flush_buffers(d->avctx);
d->finished = 0;
d->next_pts = d->start_pts;
d->next_pts_tb = d->start_pts_tb;
}
} while (pkt.data == flush_pkt.data || d->queue->serial != d->pkt_serial);
av_free_packet(&d->pkt);
d->pkt_temp = d->pkt = pkt;
d->packet_pending = 1;
}
switch (d->avctx->codec_type) {
case AVMEDIA_TYPE_VIDEO:
ret = avcodec_decode_video2(d->avctx, frame, &got_frame, &d->pkt_temp);
if (got_frame) {
if (decoder_reorder_pts == -1) {
frame->pts = av_frame_get_best_effort_timestamp(frame);
} else if (decoder_reorder_pts) {
frame->pts = frame->pkt_pts;
} else {
frame->pts = frame->pkt_dts;
}
}
break;
case AVMEDIA_TYPE_AUDIO:
ret = avcodec_decode_audio4(d->avctx, frame, &got_frame, &d->pkt_temp);
if (got_frame) {
AVRational tb = (AVRational){1, frame->sample_rate};
if (frame->pts != AV_NOPTS_VALUE)
frame->pts = av_rescale_q(frame->pts, d->avctx->time_base, tb);
else if (frame->pkt_pts != AV_NOPTS_VALUE)
frame->pts = av_rescale_q(frame->pkt_pts, av_codec_get_pkt_timebase(d->avctx), tb);
else if (d->next_pts != AV_NOPTS_VALUE)
frame->pts = av_rescale_q(d->next_pts, d->next_pts_tb, tb);
if (frame->pts != AV_NOPTS_VALUE) {
d->next_pts = frame->pts + frame->nb_samples;
d->next_pts_tb = tb;
}
}
break;
case AVMEDIA_TYPE_SUBTITLE:
ret = avcodec_decode_subtitle2(d->avctx, sub, &got_frame, &d->pkt_temp);
break;
}
if (ret < 0) {
d->packet_pending = 0;
} else {
d->pkt_temp.dts =
d->pkt_temp.pts = AV_NOPTS_VALUE;
if (d->pkt_temp.data) {
if (d->avctx->codec_type != AVMEDIA_TYPE_AUDIO)
ret = d->pkt_temp.size;
d->pkt_temp.data += ret;
d->pkt_temp.size -= ret;
if (d->pkt_temp.size <= 0)
d->packet_pending = 0;
} else {
if (!got_frame) {
d->packet_pending = 0;
d->finished = d->pkt_serial; // FLAG
}
}
}
} while (!got_frame && !d->finished);
return got_frame;
}
It's difficult for me to understand the following code:
d->finished = d->pkt_serial; // FLAG
Can anyone help me ?
Thanks.
See this commit.
Serial has two purposes nowadays: the initial purpose in the commit was to be able to distinguish packets in the packet queue from before and after a seek. The demuxer (input for packet queue) runs in a separate thread. After a seek, we want to flush it, but we don't want to stop the producer thread because overhead. However, we also don't want to flush too few or too many packets. So, the serial field tells us which packets are pre- and post-flush and thus which packets to drop without having to stop the producer thread while we're dropping those packets.
The second purpose is your line of code: it tells us when EOF occurs. Finished is set to the last serial number of a packet from the packet queue used to decode a frame. If that serial number is also the tail of the packet queue (and no more packets are produced), it means we stopped producing packets and decoded the frame belonging to that packet. In other words: end-of-file. Elsewhere, you'll find a test along those lines, and then either playback stops or (if looping is enabled) we seek back to the beginning of the file (i.e. invoke the looping behaviour).
(This write-up was helpfully assisted by several FFmpeg developers on IRC.)

Audio encoding using avcodec_fill_audio_frame() and memory leaks

As a part of encoding decoded audio packets, I'm using avcodec_fill_audio_frame(). I'm passing allocated AVFrame pointer to along with buffer containing the decoded samples and other parameters number of channels, sample format, buffer size. Though the encoding is working fine I'm not able to completely eliminate the memory leaks. I've taken care of most of things but still I'm not able detect the leakage.
Below is the function which I'm using for encoding. Please suggest something.
AudioSample contains decoded data and it is completely managed in different class(free in class destructor). I'm freeing the AVFrame in FFmpegEncoder destructor and AVPacket is freed every time using av_free_packet() with av_packet_destruct enabled. What more do I need to free?
void FfmpegEncoder::WriteAudioSample(AudioSample *audS)
{
int num_audio_frame = 0;
AVCodecContext *c = NULL;
// AVFrame *frame;
AVPacket pkt;
av_init_packet(&pkt);
pkt.destruct = av_destruct_packet;
pkt.data = NULL;
pkt.size = 0;
int ret = 0, got_packet = 0;
c = m_out_aud_strm->codec;
static int64_t aud_pts_in = -1;
if((audS != NULL) && (audS->GetSampleLength() > 0) )
{
int byte_per_sample = av_get_bytes_per_sample(c->sample_fmt);
PRINT_VAL("Byte Per Sample ", byte_per_sample)
m_frame->nb_samples = (audS->GetSampleLength())/(c->channels*av_get_bytes_per_sample(c->sample_fmt));
if(m_frame->nb_samples == c->frame_size)
{
#if 1
if(m_need_resample && (c->channels >= 2))
{
uint8_t * t_buff1 = new uint8_t[audS->GetSampleLength()];
if(t_buff1 != NULL)
{
for(int64_t i = 0; i< m_frame->nb_samples; i++)
{
memcpy(t_buff1 + i*byte_per_sample, (uint8_t*)((uint8_t*)audS->GetAudioSampleData() + i*byte_per_sample*c->channels), byte_per_sample);
memcpy(t_buff1 + (audS->GetSampleLength())/2 + i*byte_per_sample, (uint8_t*)((uint8_t*)audS->GetAudioSampleData() + i*byte_per_sample*c->channels+ byte_per_sample), byte_per_sample);
}
audS->FillAudioSample(t_buff1, audS->GetSampleLength());
delete[] t_buff1;
}
}
#endif
ret = avcodec_fill_audio_frame(m_frame, c->channels, c->sample_fmt, (uint8_t*)audS->GetAudioSampleData(),m_frame->nb_samples*byte_per_sample*c->channels, 0);
//ret = avcodec_fill_audio_frame(&frame, c->channels, c->sample_fmt, t_buff,frame.nb_samples*byte_per_sample*c->channels, 0);
if(ret != 0)
{
PRINT_MSG("Avcodec Fill Audio Failed ")
}
else
{
got_packet = 0;
ret = avcodec_encode_audio2(c, &pkt, m_frame, &got_packet);
if(ret < 0 || got_packet == 0)
{
PRINT_MSG("failed to encode audio ")
}
else
{
PRINT_MSG("Audio Packet Encoded ");
aud_pts_in++;
pkt.pts = aud_pts_in;
pkt.dts = pkt.pts;
pkt.stream_index = m_out_aud_strm->index;
ret = av_interleaved_write_frame(oc, &pkt);
if(ret != 0)
{
PRINT_MSG("Error Write Audio PKT ")
}
else
{
PRINT_MSG("Audio PKT Writen ")
}
}
}
}
avcodec_flush_buffers(c);
// avcodec_free_frame(&frame);
}
av_free_packet(&pkt);
}
Thanks,
Pradeep
//================== SEND AUDIO OUTPUT =======================
void AVOutputStream::sendAudioOutput (AVFrame* inputFrame)
{
AVCodecContext *codecCtx = pOutputAudioStream->codec;
// set source data variables
sourceNumberOfChannels = inputFrame->channels;
sourceChannelLayout = inputFrame->channel_layout;
sourceSampleRate = inputFrame->sample_rate;
_sourceSampleFormat = (AVSampleFormat)inputFrame->format;
sourceNumberOfSamples = inputFrame->nb_samples;
// set destination data variables
destinationNumberOfChannels = codecCtx->channels;
destinationChannelLayout = codecCtx->channel_layout;
destinationSampleRate = codecCtx->sample_rate;
destinationSampleFormat = codecCtx->sample_fmt;//AV_SAMPLE_FMT_FLTP;//EncodecCtx->sample_fmt;
destinationLineSize = 0;
destinationData = NULL;
int returnVal = 0;
if (startDecode == false)
{
startDecode = true;
resamplerCtx = swr_alloc_set_opts(NULL,
destinationChannelLayout,
destinationSampleFormat,
destinationSampleRate,
sourceChannelLayout,
_sourceSampleFormat,
sourceSampleRate,
0,
NULL);
if (resamplerCtx == NULL)
{
std::cout << "Unable to create the resampler context for the audio frame";
isConnected = false;
}
// initialize the resampling context
returnVal = swr_init(resamplerCtx);
if (returnVal < 0)
{
std::cout << "Unable to init the resampler context, error:";
isConnected = false;
}
} //if (startDecode == false)
if (sourceSampleRate != 0)
destinationNumberOfSamples = destinationSampleRate/sourceSampleRate * sourceNumberOfSamples;
// allocate the destination samples buffer
returnVal = av_samples_alloc_array_and_samples(&destinationData,
&destinationLineSize,
destinationNumberOfChannels,
destinationNumberOfSamples,
destinationSampleFormat,
0);
if (returnVal < 0)
{
std::cout << "Unable to allocate destination samples, error";
isConnected = false;
}
// convert to destination format
returnVal = swr_convert(resamplerCtx,
destinationData,
destinationNumberOfSamples,
(const uint8_t **)inputFrame->data, //sourceData,
sourceNumberOfSamples);
if (returnVal < 0)
{
std::cout << "Resampling failed, error \n";
isConnected = false;
}
int bufferSize = av_samples_get_buffer_size(&destinationLineSize,
destinationNumberOfChannels,
destinationNumberOfSamples,
destinationSampleFormat,
0);
//whithout fifo
pOutputAudioFrame = av_frame_alloc();
pOutputAudioFrame->nb_samples = codecCtx->frame_size;//frameNumberOfSamples;
pOutputAudioFrame->format = codecCtx->sample_fmt;
pOutputAudioFrame->channel_layout = codecCtx->channel_layout;
pOutputAudioFrame->channels = codecCtx->channels;
pOutputAudioFrame->sample_rate = codecCtx->sample_rate;
returnVal = avcodec_fill_audio_frame(pOutputAudioFrame,
pOutputAudioFrame->channels,
(AVSampleFormat)pOutputAudioFrame->format,
(const uint8_t *)destinationData[0],
bufferSize,0);
pOutputAudioFrame->pts = inputFrame->pts;
if (returnVal < 0)
{
std::cout << "Unable to fill the audio frame wsampleIndexith captured audio data,error";
isConnected = false;
}
// encode the audio frame, fill a packet for streaming
av_init_packet(&outAudioPacket);
outAudioPacket.data = NULL;
outAudioPacket.size = 0;
outAudioPacket.dts = outAudioPacket.pts = 0;
int gotPacket;
// encoding
returnVal = avcodec_encode_audio2(codecCtx, &outAudioPacket, pOutputAudioFrame, &gotPacket);
// free buffers
av_freep(&destinationData[0]);
av_freep(&destinationData);
av_frame_free(&pOutputAudioFrame);
if (gotPacket)
{
outAudioPacket.stream_index = pOutputAudioStream->index;
outAudioPacket.flags |= AV_PKT_FLAG_KEY;
returnVal = av_interleaved_write_frame(pOutputFormatCtx, &outAudioPacket);
//returnVal = av_write_frame(pOutputFormatCtx, &outAudioPacket);
if (returnVal != 0)
{
std::cout << "Cannot write audio packet \n";
isConnected = false;
}
av_free_packet(&outAudioPacket);
} // if (gotPacket)
}
You can see after resample i free used buffers.
// free buffers
av_freep(&destinationData[0]);
av_freep(&destinationData);

Transcoding audio using xuggler

I am trying to convert an audio file with the header
Opening audio decoder: [pcm] Uncompressed PCM audio decoder
AUDIO: 44100 Hz, 2 ch, s16le, 1411.2 kbit/100.00% (ratio: 176400->176400)
Selected audio codec: [pcm] afm: pcm (Uncompressed PCM)
I want to transcode this file to mp3 format. I have following code snippet but its not working well. I have written it using XUGGLER code snippet for transcoding audio and video.
Audio decoder is
audioDecoder = IStreamCoder.make(IStreamCoder.Direction.DECODING, ICodec.findDecodingCodec(ICodec.ID.CODEC_ID_PCM_S16LE));
audioDecoder.setSampleRate(44100);
audioDecoder.setBitRate(176400);
audioDecoder.setChannels(2);
audioDecoder.setTimeBase(IRational.make(1,1000));
if (audioDecoder.open(IMetaData.make(), IMetaData.make()) < 0)
return false;
return true;
Audio encoder is
outContainer = IContainer.make();
outContainerFormat = IContainerFormat.make();
outContainerFormat.setOutputFormat("mp3", urlOut, null);
int retVal = outContainer.open(urlOut, IContainer.Type.WRITE, outContainerFormat);
if (retVal < 0) {
System.out.println("Could not open output container");
return false;
}
outAudioCoder = IStreamCoder.make(IStreamCoder.Direction.ENCODING, ICodec.findEncodingCodec(ICodec.ID.CODEC_ID_MP3));
outAudioStream = outContainer.addNewStream(outAudioCoder);
outAudioCoder.setSampleRate(new Integer(44100));
outAudioCoder.setChannels(2);
retVal = outAudioCoder.open(IMetaData.make(), IMetaData.make());
if (retVal < 0) {
System.out.println("Could not open audio coder");
return false;
}
retVal = outContainer.writeHeader();
if (retVal < 0) {
System.out.println("Could not write output FLV header: ");
return false;
}
return true;
And here is encode method where i send packets of 32 byte to transcode
public void encode(byte[] audioFrame){
//duration of 1 video frame
long lastVideoPts = 0;
IPacket packet_out = IPacket.make();
int lastPos = 0;
int lastPos_out = 0;
IAudioSamples audioSamples = IAudioSamples.make(48000, audioDecoder.getChannels());
IAudioSamples audioSamples_resampled = IAudioSamples.make(48000, audioDecoder.getChannels());
//we always have 32 bytes/sample
int pos = 0;
int audioFrameLength = audioFrame.length;
int audioFrameCnt = 1;
iBuffer = IBuffer.make(null, audioFrame, 0, audioFrameLength);
IPacket packet = IPacket.make(iBuffer);
//packet.setKeyPacket(true);
packet.setTimeBase(IRational.make(1,1000));
packet.setDuration(20);
packet.setDts(audioFrameCnt*20);
packet.setPts(audioFrameCnt*20);
packet.setStreamIndex(1);
packet.setPosition(lastPos);
lastPos+=audioFrameLength;
int pksz = packet.getSize();
packet.setComplete(true, pksz);
/*
* A packet can actually contain multiple samples
*/
int offset = 0;
int retVal;
while(offset < packet.getSize())
{
int bytesDecoded = audioDecoder.decodeAudio(audioSamples, packet, offset);
if (bytesDecoded < 0)
throw new RuntimeException("got error decoding audio ");
offset += bytesDecoded;
if (audioSamples.isComplete())
{
int samplesConsumed = 0;
while (samplesConsumed < audioSamples.getNumSamples()) {
retVal = outAudioCoder.encodeAudio(packet_out, audioSamples, samplesConsumed);
if (retVal <= 0)
throw new RuntimeException("Could not encode audio");
samplesConsumed += retVal;
if (packet_out.isComplete()) {
packet_out.setPosition(lastPos_out);
packet_out.setStreamIndex(1);
lastPos_out+=packet_out.getSize();
retVal = outContainer.writePacket(packet_out);
if(retVal < 0){
throw new RuntimeException("Could not write data packet");
}
}
}
}
}
}
I get an output file but it doesnt get played. I have very little experience of audio encoding and sampling. Thanks in advance.
Finally I am able to live transcode a pcm stream to mp3 stream.
There were couple of issues in the code :
I was trying to transcode only one thing i.e audio and the code snippet was transcoding audio as well as video so there was an issue in setting stream index.
packet_out.setStreamIndex(1); packet_out.setStreamIndex(0)
Second thing was calculations ffmpeg guid
channel * bits * sampling rate = bit rate
This thing was miscalculated at my end.
Number of samples in audio samples depends upon your sampling rate. That was wrong in my code.
NOTE : this is a pretty old code
byte[] data = new byte[418];
public void encode(byte[] audioFrame) {
IPacket packet_out = IPacket.make();
int lastPos_out = 0;
IAudioSamples audioSamples = IAudioSamples.make(11025, audioDecoder.getChannels());
//IAudioSamples audioSamples_resampled = IAudioSamples.make(48000, audioDecoder.getChannels());
//we always have 32 bytes/sample
int pos = 0;
int audioFrameLength = audioFrame.length;
int audioFrameCnt = 1;
iBuffer = IBuffer.make(null, audioFrame, 0, audioFrameLength);
IPacket packet = IPacket.make(iBuffer);
//packet.setKeyPacket(true);
packet.setTimeBase(IRational.make(1, 1000));
packet.setStreamIndex(0);
int pksz = packet.getSize();
packet.setComplete(true, pksz);
/*
* A packet can actually contain multiple samples
*/
int offset = 0;
int retVal;
while (offset < packet.getSize()) {
int bytesDecoded = audioDecoder.decodeAudio(audioSamples, packet, offset);
if (bytesDecoded < 0)
throw new RuntimeException("got error decoding audio ");
offset += bytesDecoded;
if (audioSamples.isComplete()) {
/*audioResampler.resample(audioSamples_resampled, audioSamples, audioSamples.getNumSamples());
audioSamples_resampled.setPts(Global.NO_PTS);*/
int samplesConsumed = 0;
while (samplesConsumed < audioSamples.getNumSamples()) {
retVal = outAudioCoder.encodeAudio(packet_out, audioSamples, samplesConsumed);
if (retVal <= 0)
throw new RuntimeException("Could not encode audio");
samplesConsumed += retVal;
if (packet_out.isComplete()) {
packet_out.setPosition(lastPos_out);
packet_out.setStreamIndex(0);
lastPos_out += packet_out.getSize();
System.out.println("size" + packet_out.getSize());
packet_out.getByteBuffer().get(data,0,packet_out.getSize());
try {
fo.write(data);
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
packet_out.reset();
//retVal = outContainer.writePacket(packet_out);
if (retVal < 0) {
throw new RuntimeException("Could not write data packet");
}
}
}
}
}
}

Resources